~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/pipe.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/pipe.c (Version linux-6.12-rc7) and /fs/pipe.c (Version linux-6.0.19)


  1 // SPDX-License-Identifier: GPL-2.0                 1 // SPDX-License-Identifier: GPL-2.0
  2 /*                                                  2 /*
  3  *  linux/fs/pipe.c                                 3  *  linux/fs/pipe.c
  4  *                                                  4  *
  5  *  Copyright (C) 1991, 1992, 1999  Linus Torv      5  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
  6  */                                                 6  */
  7                                                     7 
  8 #include <linux/mm.h>                               8 #include <linux/mm.h>
  9 #include <linux/file.h>                             9 #include <linux/file.h>
 10 #include <linux/poll.h>                            10 #include <linux/poll.h>
 11 #include <linux/slab.h>                            11 #include <linux/slab.h>
 12 #include <linux/module.h>                          12 #include <linux/module.h>
 13 #include <linux/init.h>                            13 #include <linux/init.h>
 14 #include <linux/fs.h>                              14 #include <linux/fs.h>
 15 #include <linux/log2.h>                            15 #include <linux/log2.h>
 16 #include <linux/mount.h>                           16 #include <linux/mount.h>
 17 #include <linux/pseudo_fs.h>                       17 #include <linux/pseudo_fs.h>
 18 #include <linux/magic.h>                           18 #include <linux/magic.h>
 19 #include <linux/pipe_fs_i.h>                       19 #include <linux/pipe_fs_i.h>
 20 #include <linux/uio.h>                             20 #include <linux/uio.h>
 21 #include <linux/highmem.h>                         21 #include <linux/highmem.h>
 22 #include <linux/pagemap.h>                         22 #include <linux/pagemap.h>
 23 #include <linux/audit.h>                           23 #include <linux/audit.h>
 24 #include <linux/syscalls.h>                        24 #include <linux/syscalls.h>
 25 #include <linux/fcntl.h>                           25 #include <linux/fcntl.h>
 26 #include <linux/memcontrol.h>                      26 #include <linux/memcontrol.h>
 27 #include <linux/watch_queue.h>                     27 #include <linux/watch_queue.h>
 28 #include <linux/sysctl.h>                          28 #include <linux/sysctl.h>
 29                                                    29 
 30 #include <linux/uaccess.h>                         30 #include <linux/uaccess.h>
 31 #include <asm/ioctls.h>                            31 #include <asm/ioctls.h>
 32                                                    32 
 33 #include "internal.h"                              33 #include "internal.h"
 34                                                    34 
 35 /*                                                 35 /*
 36  * New pipe buffers will be restricted to this     36  * New pipe buffers will be restricted to this size while the user is exceeding
 37  * their pipe buffer quota. The general pipe u     37  * their pipe buffer quota. The general pipe use case needs at least two
 38  * buffers: one for data yet to be read, and o     38  * buffers: one for data yet to be read, and one for new data. If this is less
 39  * than two, then a write to a non-empty pipe      39  * than two, then a write to a non-empty pipe may block even if the pipe is not
 40  * full. This can occur with GNU make jobserve     40  * full. This can occur with GNU make jobserver or similar uses of pipes as
 41  * semaphores: multiple processes may be waiti     41  * semaphores: multiple processes may be waiting to write tokens back to the
 42  * pipe before reading tokens: https://lore.ke     42  * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
 43  *                                                 43  *
 44  * Users can reduce their pipe buffers with F_     44  * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
 45  * own risk, namely: pipe writes to non-full p     45  * own risk, namely: pipe writes to non-full pipes may block until the pipe is
 46  * emptied.                                        46  * emptied.
 47  */                                                47  */
 48 #define PIPE_MIN_DEF_BUFFERS 2                     48 #define PIPE_MIN_DEF_BUFFERS 2
 49                                                    49 
 50 /*                                                 50 /*
 51  * The max size that a non-root user is allowe     51  * The max size that a non-root user is allowed to grow the pipe. Can
 52  * be set by root in /proc/sys/fs/pipe-max-siz     52  * be set by root in /proc/sys/fs/pipe-max-size
 53  */                                                53  */
 54 static unsigned int pipe_max_size = 1048576;       54 static unsigned int pipe_max_size = 1048576;
 55                                                    55 
 56 /* Maximum allocatable pages per user. Hard li     56 /* Maximum allocatable pages per user. Hard limit is unset by default, soft
 57  * matches default values.                         57  * matches default values.
 58  */                                                58  */
 59 static unsigned long pipe_user_pages_hard;         59 static unsigned long pipe_user_pages_hard;
 60 static unsigned long pipe_user_pages_soft = PI     60 static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
 61                                                    61 
 62 /*                                                 62 /*
 63  * We use head and tail indices that aren't ma     63  * We use head and tail indices that aren't masked off, except at the point of
 64  * dereference, but rather they're allowed to      64  * dereference, but rather they're allowed to wrap naturally.  This means there
 65  * isn't a dead spot in the buffer, but the ri     65  * isn't a dead spot in the buffer, but the ring has to be a power of two and
 66  * <= 2^31.                                        66  * <= 2^31.
 67  * -- David Howells 2019-09-23.                    67  * -- David Howells 2019-09-23.
 68  *                                                 68  *
 69  * Reads with count = 0 should always return 0     69  * Reads with count = 0 should always return 0.
 70  * -- Julian Bradfield 1999-06-07.                 70  * -- Julian Bradfield 1999-06-07.
 71  *                                                 71  *
 72  * FIFOs and Pipes now generate SIGIO for both     72  * FIFOs and Pipes now generate SIGIO for both readers and writers.
 73  * -- Jeremy Elson <jelson@circlemud.org> 2001     73  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
 74  *                                                 74  *
 75  * pipe_read & write cleanup                       75  * pipe_read & write cleanup
 76  * -- Manfred Spraul <manfred@colorfullife.com     76  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
 77  */                                                77  */
 78                                                    78 
 79 #define cmp_int(l, r)           ((l > r) - (l  !!  79 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 80                                                << 
 81 #ifdef CONFIG_PROVE_LOCKING                    << 
 82 static int pipe_lock_cmp_fn(const struct lockd << 
 83                             const struct lockd << 
 84 {                                                  80 {
 85         return cmp_int((unsigned long) a, (uns !!  81         if (pipe->files)
                                                   >>  82                 mutex_lock_nested(&pipe->mutex, subclass);
 86 }                                                  83 }
 87 #endif                                         << 
 88                                                    84 
 89 void pipe_lock(struct pipe_inode_info *pipe)       85 void pipe_lock(struct pipe_inode_info *pipe)
 90 {                                                  86 {
 91         if (pipe->files)                       !!  87         /*
 92                 mutex_lock(&pipe->mutex);      !!  88          * pipe_lock() nests non-pipe inode locks (for writing to a file)
                                                   >>  89          */
                                                   >>  90         pipe_lock_nested(pipe, I_MUTEX_PARENT);
 93 }                                                  91 }
 94 EXPORT_SYMBOL(pipe_lock);                          92 EXPORT_SYMBOL(pipe_lock);
 95                                                    93 
 96 void pipe_unlock(struct pipe_inode_info *pipe)     94 void pipe_unlock(struct pipe_inode_info *pipe)
 97 {                                                  95 {
 98         if (pipe->files)                           96         if (pipe->files)
 99                 mutex_unlock(&pipe->mutex);        97                 mutex_unlock(&pipe->mutex);
100 }                                                  98 }
101 EXPORT_SYMBOL(pipe_unlock);                        99 EXPORT_SYMBOL(pipe_unlock);
102                                                   100 
                                                   >> 101 static inline void __pipe_lock(struct pipe_inode_info *pipe)
                                                   >> 102 {
                                                   >> 103         mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
                                                   >> 104 }
                                                   >> 105 
                                                   >> 106 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
                                                   >> 107 {
                                                   >> 108         mutex_unlock(&pipe->mutex);
                                                   >> 109 }
                                                   >> 110 
103 void pipe_double_lock(struct pipe_inode_info *    111 void pipe_double_lock(struct pipe_inode_info *pipe1,
104                       struct pipe_inode_info *    112                       struct pipe_inode_info *pipe2)
105 {                                                 113 {
106         BUG_ON(pipe1 == pipe2);                   114         BUG_ON(pipe1 == pipe2);
107                                                   115 
108         if (pipe1 > pipe2)                     !! 116         if (pipe1 < pipe2) {
109                 swap(pipe1, pipe2);            !! 117                 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
110                                                !! 118                 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
111         pipe_lock(pipe1);                      !! 119         } else {
112         pipe_lock(pipe2);                      !! 120                 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
                                                   >> 121                 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
                                                   >> 122         }
113 }                                                 123 }
114                                                   124 
115 static void anon_pipe_buf_release(struct pipe_    125 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
116                                   struct pipe_    126                                   struct pipe_buffer *buf)
117 {                                                 127 {
118         struct page *page = buf->page;            128         struct page *page = buf->page;
119                                                   129 
120         /*                                        130         /*
121          * If nobody else uses this page, and     131          * If nobody else uses this page, and we don't already have a
122          * temporary page, let's keep track of    132          * temporary page, let's keep track of it as a one-deep
123          * allocation cache. (Otherwise just r    133          * allocation cache. (Otherwise just release our reference to it)
124          */                                       134          */
125         if (page_count(page) == 1 && !pipe->tm    135         if (page_count(page) == 1 && !pipe->tmp_page)
126                 pipe->tmp_page = page;            136                 pipe->tmp_page = page;
127         else                                      137         else
128                 put_page(page);                   138                 put_page(page);
129 }                                                 139 }
130                                                   140 
131 static bool anon_pipe_buf_try_steal(struct pip    141 static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
132                 struct pipe_buffer *buf)          142                 struct pipe_buffer *buf)
133 {                                                 143 {
134         struct page *page = buf->page;            144         struct page *page = buf->page;
135                                                   145 
136         if (page_count(page) != 1)                146         if (page_count(page) != 1)
137                 return false;                     147                 return false;
138         memcg_kmem_uncharge_page(page, 0);        148         memcg_kmem_uncharge_page(page, 0);
139         __SetPageLocked(page);                    149         __SetPageLocked(page);
140         return true;                              150         return true;
141 }                                                 151 }
142                                                   152 
143 /**                                               153 /**
144  * generic_pipe_buf_try_steal - attempt to tak    154  * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
145  * @pipe:       the pipe that the buffer belon    155  * @pipe:       the pipe that the buffer belongs to
146  * @buf:        the buffer to attempt to steal    156  * @buf:        the buffer to attempt to steal
147  *                                                157  *
148  * Description:                                   158  * Description:
149  *      This function attempts to steal the &s    159  *      This function attempts to steal the &struct page attached to
150  *      @buf. If successful, this function ret    160  *      @buf. If successful, this function returns 0 and returns with
151  *      the page locked. The caller may then r    161  *      the page locked. The caller may then reuse the page for whatever
152  *      he wishes; the typical use is insertio    162  *      he wishes; the typical use is insertion into a different file
153  *      page cache.                               163  *      page cache.
154  */                                               164  */
155 bool generic_pipe_buf_try_steal(struct pipe_in    165 bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
156                 struct pipe_buffer *buf)          166                 struct pipe_buffer *buf)
157 {                                                 167 {
158         struct page *page = buf->page;            168         struct page *page = buf->page;
159                                                   169 
160         /*                                        170         /*
161          * A reference of one is golden, that     171          * A reference of one is golden, that means that the owner of this
162          * page is the only one holding a refe    172          * page is the only one holding a reference to it. lock the page
163          * and return OK.                         173          * and return OK.
164          */                                       174          */
165         if (page_count(page) == 1) {              175         if (page_count(page) == 1) {
166                 lock_page(page);                  176                 lock_page(page);
167                 return true;                      177                 return true;
168         }                                         178         }
169         return false;                             179         return false;
170 }                                                 180 }
171 EXPORT_SYMBOL(generic_pipe_buf_try_steal);        181 EXPORT_SYMBOL(generic_pipe_buf_try_steal);
172                                                   182 
173 /**                                               183 /**
174  * generic_pipe_buf_get - get a reference to a    184  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
175  * @pipe:       the pipe that the buffer belon    185  * @pipe:       the pipe that the buffer belongs to
176  * @buf:        the buffer to get a reference     186  * @buf:        the buffer to get a reference to
177  *                                                187  *
178  * Description:                                   188  * Description:
179  *      This function grabs an extra reference    189  *      This function grabs an extra reference to @buf. It's used in
180  *      the tee() system call, when we duplica    190  *      the tee() system call, when we duplicate the buffers in one
181  *      pipe into another.                        191  *      pipe into another.
182  */                                               192  */
183 bool generic_pipe_buf_get(struct pipe_inode_in    193 bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
184 {                                                 194 {
185         return try_get_page(buf->page);           195         return try_get_page(buf->page);
186 }                                                 196 }
187 EXPORT_SYMBOL(generic_pipe_buf_get);              197 EXPORT_SYMBOL(generic_pipe_buf_get);
188                                                   198 
189 /**                                               199 /**
190  * generic_pipe_buf_release - put a reference     200  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
191  * @pipe:       the pipe that the buffer belon    201  * @pipe:       the pipe that the buffer belongs to
192  * @buf:        the buffer to put a reference     202  * @buf:        the buffer to put a reference to
193  *                                                203  *
194  * Description:                                   204  * Description:
195  *      This function releases a reference to     205  *      This function releases a reference to @buf.
196  */                                               206  */
197 void generic_pipe_buf_release(struct pipe_inod    207 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
198                               struct pipe_buff    208                               struct pipe_buffer *buf)
199 {                                                 209 {
200         put_page(buf->page);                      210         put_page(buf->page);
201 }                                                 211 }
202 EXPORT_SYMBOL(generic_pipe_buf_release);          212 EXPORT_SYMBOL(generic_pipe_buf_release);
203                                                   213 
204 static const struct pipe_buf_operations anon_p    214 static const struct pipe_buf_operations anon_pipe_buf_ops = {
205         .release        = anon_pipe_buf_releas    215         .release        = anon_pipe_buf_release,
206         .try_steal      = anon_pipe_buf_try_st    216         .try_steal      = anon_pipe_buf_try_steal,
207         .get            = generic_pipe_buf_get    217         .get            = generic_pipe_buf_get,
208 };                                                218 };
209                                                   219 
210 /* Done while waiting without holding the pipe    220 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
211 static inline bool pipe_readable(const struct     221 static inline bool pipe_readable(const struct pipe_inode_info *pipe)
212 {                                                 222 {
213         unsigned int head = READ_ONCE(pipe->he    223         unsigned int head = READ_ONCE(pipe->head);
214         unsigned int tail = READ_ONCE(pipe->ta    224         unsigned int tail = READ_ONCE(pipe->tail);
215         unsigned int writers = READ_ONCE(pipe-    225         unsigned int writers = READ_ONCE(pipe->writers);
216                                                   226 
217         return !pipe_empty(head, tail) || !wri    227         return !pipe_empty(head, tail) || !writers;
218 }                                                 228 }
219                                                   229 
220 static inline unsigned int pipe_update_tail(st << 
221                                             st << 
222                                             un << 
223 {                                              << 
224         pipe_buf_release(pipe, buf);           << 
225                                                << 
226         /*                                     << 
227          * If the pipe has a watch_queue, we n << 
228          * by the spinlock because notificatio << 
229          * this spinlock, no mutex             << 
230          */                                    << 
231         if (pipe_has_watch_queue(pipe)) {      << 
232                 spin_lock_irq(&pipe->rd_wait.l << 
233 #ifdef CONFIG_WATCH_QUEUE                      << 
234                 if (buf->flags & PIPE_BUF_FLAG << 
235                         pipe->note_loss = true << 
236 #endif                                         << 
237                 pipe->tail = ++tail;           << 
238                 spin_unlock_irq(&pipe->rd_wait << 
239                 return tail;                   << 
240         }                                      << 
241                                                << 
242         /*                                     << 
243          * Without a watch_queue, we can simpl << 
244          * without the spinlock - the mutex is << 
245          */                                    << 
246         pipe->tail = ++tail;                   << 
247         return tail;                           << 
248 }                                              << 
249                                                << 
250 static ssize_t                                    230 static ssize_t
251 pipe_read(struct kiocb *iocb, struct iov_iter     231 pipe_read(struct kiocb *iocb, struct iov_iter *to)
252 {                                                 232 {
253         size_t total_len = iov_iter_count(to);    233         size_t total_len = iov_iter_count(to);
254         struct file *filp = iocb->ki_filp;        234         struct file *filp = iocb->ki_filp;
255         struct pipe_inode_info *pipe = filp->p    235         struct pipe_inode_info *pipe = filp->private_data;
256         bool was_full, wake_next_reader = fals    236         bool was_full, wake_next_reader = false;
257         ssize_t ret;                              237         ssize_t ret;
258                                                   238 
259         /* Null read succeeds. */                 239         /* Null read succeeds. */
260         if (unlikely(total_len == 0))             240         if (unlikely(total_len == 0))
261                 return 0;                         241                 return 0;
262                                                   242 
263         ret = 0;                                  243         ret = 0;
264         mutex_lock(&pipe->mutex);              !! 244         __pipe_lock(pipe);
265                                                   245 
266         /*                                        246         /*
267          * We only wake up writers if the pipe    247          * We only wake up writers if the pipe was full when we started
268          * reading in order to avoid unnecessa    248          * reading in order to avoid unnecessary wakeups.
269          *                                        249          *
270          * But when we do wake up writers, we     250          * But when we do wake up writers, we do so using a sync wakeup
271          * (WF_SYNC), because we want them to     251          * (WF_SYNC), because we want them to get going and generate more
272          * data for us.                           252          * data for us.
273          */                                       253          */
274         was_full = pipe_full(pipe->head, pipe-    254         was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
275         for (;;) {                                255         for (;;) {
276                 /* Read ->head with a barrier     256                 /* Read ->head with a barrier vs post_one_notification() */
277                 unsigned int head = smp_load_a    257                 unsigned int head = smp_load_acquire(&pipe->head);
278                 unsigned int tail = pipe->tail    258                 unsigned int tail = pipe->tail;
279                 unsigned int mask = pipe->ring    259                 unsigned int mask = pipe->ring_size - 1;
280                                                   260 
281 #ifdef CONFIG_WATCH_QUEUE                         261 #ifdef CONFIG_WATCH_QUEUE
282                 if (pipe->note_loss) {            262                 if (pipe->note_loss) {
283                         struct watch_notificat    263                         struct watch_notification n;
284                                                   264 
285                         if (total_len < 8) {      265                         if (total_len < 8) {
286                                 if (ret == 0)     266                                 if (ret == 0)
287                                         ret =     267                                         ret = -ENOBUFS;
288                                 break;            268                                 break;
289                         }                         269                         }
290                                                   270 
291                         n.type = WATCH_TYPE_ME    271                         n.type = WATCH_TYPE_META;
292                         n.subtype = WATCH_META    272                         n.subtype = WATCH_META_LOSS_NOTIFICATION;
293                         n.info = watch_sizeof(    273                         n.info = watch_sizeof(n);
294                         if (copy_to_iter(&n, s    274                         if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
295                                 if (ret == 0)     275                                 if (ret == 0)
296                                         ret =     276                                         ret = -EFAULT;
297                                 break;            277                                 break;
298                         }                         278                         }
299                         ret += sizeof(n);         279                         ret += sizeof(n);
300                         total_len -= sizeof(n)    280                         total_len -= sizeof(n);
301                         pipe->note_loss = fals    281                         pipe->note_loss = false;
302                 }                                 282                 }
303 #endif                                            283 #endif
304                                                   284 
305                 if (!pipe_empty(head, tail)) {    285                 if (!pipe_empty(head, tail)) {
306                         struct pipe_buffer *bu    286                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
307                         size_t chars = buf->le    287                         size_t chars = buf->len;
308                         size_t written;           288                         size_t written;
309                         int error;                289                         int error;
310                                                   290 
311                         if (chars > total_len)    291                         if (chars > total_len) {
312                                 if (buf->flags    292                                 if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
313                                         if (re    293                                         if (ret == 0)
314                                                   294                                                 ret = -ENOBUFS;
315                                         break;    295                                         break;
316                                 }                 296                                 }
317                                 chars = total_    297                                 chars = total_len;
318                         }                         298                         }
319                                                   299 
320                         error = pipe_buf_confi    300                         error = pipe_buf_confirm(pipe, buf);
321                         if (error) {              301                         if (error) {
322                                 if (!ret)         302                                 if (!ret)
323                                         ret =     303                                         ret = error;
324                                 break;            304                                 break;
325                         }                         305                         }
326                                                   306 
327                         written = copy_page_to    307                         written = copy_page_to_iter(buf->page, buf->offset, chars, to);
328                         if (unlikely(written <    308                         if (unlikely(written < chars)) {
329                                 if (!ret)         309                                 if (!ret)
330                                         ret =     310                                         ret = -EFAULT;
331                                 break;            311                                 break;
332                         }                         312                         }
333                         ret += chars;             313                         ret += chars;
334                         buf->offset += chars;     314                         buf->offset += chars;
335                         buf->len -= chars;        315                         buf->len -= chars;
336                                                   316 
337                         /* Was it a packet buf    317                         /* Was it a packet buffer? Clean up and exit */
338                         if (buf->flags & PIPE_    318                         if (buf->flags & PIPE_BUF_FLAG_PACKET) {
339                                 total_len = ch    319                                 total_len = chars;
340                                 buf->len = 0;     320                                 buf->len = 0;
341                         }                         321                         }
342                                                   322 
343                         if (!buf->len)         !! 323                         if (!buf->len) {
344                                 tail = pipe_up !! 324                                 pipe_buf_release(pipe, buf);
                                                   >> 325                                 spin_lock_irq(&pipe->rd_wait.lock);
                                                   >> 326 #ifdef CONFIG_WATCH_QUEUE
                                                   >> 327                                 if (buf->flags & PIPE_BUF_FLAG_LOSS)
                                                   >> 328                                         pipe->note_loss = true;
                                                   >> 329 #endif
                                                   >> 330                                 tail++;
                                                   >> 331                                 pipe->tail = tail;
                                                   >> 332                                 spin_unlock_irq(&pipe->rd_wait.lock);
                                                   >> 333                         }
345                         total_len -= chars;       334                         total_len -= chars;
346                         if (!total_len)           335                         if (!total_len)
347                                 break;  /* com    336                                 break;  /* common path: read succeeded */
348                         if (!pipe_empty(head,     337                         if (!pipe_empty(head, tail))    /* More to do? */
349                                 continue;         338                                 continue;
350                 }                                 339                 }
351                                                   340 
352                 if (!pipe->writers)               341                 if (!pipe->writers)
353                         break;                    342                         break;
354                 if (ret)                          343                 if (ret)
355                         break;                    344                         break;
356                 if ((filp->f_flags & O_NONBLOC !! 345                 if (filp->f_flags & O_NONBLOCK) {
357                     (iocb->ki_flags & IOCB_NOW << 
358                         ret = -EAGAIN;            346                         ret = -EAGAIN;
359                         break;                    347                         break;
360                 }                                 348                 }
361                 mutex_unlock(&pipe->mutex);    !! 349                 __pipe_unlock(pipe);
362                                                   350 
363                 /*                                351                 /*
364                  * We only get here if we didn    352                  * We only get here if we didn't actually read anything.
365                  *                                353                  *
366                  * However, we could have seen    354                  * However, we could have seen (and removed) a zero-sized
367                  * pipe buffer, and might have    355                  * pipe buffer, and might have made space in the buffers
368                  * that way.                      356                  * that way.
369                  *                                357                  *
370                  * You can't make zero-sized p    358                  * You can't make zero-sized pipe buffers by doing an empty
371                  * write (not even in packet m    359                  * write (not even in packet mode), but they can happen if
372                  * the writer gets an EFAULT w    360                  * the writer gets an EFAULT when trying to fill a buffer
373                  * that already got allocated     361                  * that already got allocated and inserted in the buffer
374                  * array.                         362                  * array.
375                  *                                363                  *
376                  * So we still need to wake up    364                  * So we still need to wake up any pending writers in the
377                  * _very_ unlikely case that t    365                  * _very_ unlikely case that the pipe was full, but we got
378                  * no data.                       366                  * no data.
379                  */                               367                  */
380                 if (unlikely(was_full))           368                 if (unlikely(was_full))
381                         wake_up_interruptible_    369                         wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
382                 kill_fasync(&pipe->fasync_writ    370                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
383                                                   371 
384                 /*                                372                 /*
385                  * But because we didn't read     373                  * But because we didn't read anything, at this point we can
386                  * just return directly with -    374                  * just return directly with -ERESTARTSYS if we're interrupted,
387                  * since we've done any requir    375                  * since we've done any required wakeups and there's no need
388                  * to mark anything accessed.     376                  * to mark anything accessed. And we've dropped the lock.
389                  */                               377                  */
390                 if (wait_event_interruptible_e    378                 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
391                         return -ERESTARTSYS;      379                         return -ERESTARTSYS;
392                                                   380 
393                 mutex_lock(&pipe->mutex);      !! 381                 __pipe_lock(pipe);
394                 was_full = pipe_full(pipe->hea    382                 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
395                 wake_next_reader = true;          383                 wake_next_reader = true;
396         }                                         384         }
397         if (pipe_empty(pipe->head, pipe->tail)    385         if (pipe_empty(pipe->head, pipe->tail))
398                 wake_next_reader = false;         386                 wake_next_reader = false;
399         mutex_unlock(&pipe->mutex);            !! 387         __pipe_unlock(pipe);
400                                                   388 
401         if (was_full)                             389         if (was_full)
402                 wake_up_interruptible_sync_pol    390                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
403         if (wake_next_reader)                     391         if (wake_next_reader)
404                 wake_up_interruptible_sync_pol    392                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
405         kill_fasync(&pipe->fasync_writers, SIG    393         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
406         if (ret > 0)                              394         if (ret > 0)
407                 file_accessed(filp);              395                 file_accessed(filp);
408         return ret;                               396         return ret;
409 }                                                 397 }
410                                                   398 
411 static inline int is_packetized(struct file *f    399 static inline int is_packetized(struct file *file)
412 {                                                 400 {
413         return (file->f_flags & O_DIRECT) != 0    401         return (file->f_flags & O_DIRECT) != 0;
414 }                                                 402 }
415                                                   403 
416 /* Done while waiting without holding the pipe    404 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
417 static inline bool pipe_writable(const struct     405 static inline bool pipe_writable(const struct pipe_inode_info *pipe)
418 {                                                 406 {
419         unsigned int head = READ_ONCE(pipe->he    407         unsigned int head = READ_ONCE(pipe->head);
420         unsigned int tail = READ_ONCE(pipe->ta    408         unsigned int tail = READ_ONCE(pipe->tail);
421         unsigned int max_usage = READ_ONCE(pip    409         unsigned int max_usage = READ_ONCE(pipe->max_usage);
422                                                   410 
423         return !pipe_full(head, tail, max_usag    411         return !pipe_full(head, tail, max_usage) ||
424                 !READ_ONCE(pipe->readers);        412                 !READ_ONCE(pipe->readers);
425 }                                                 413 }
426                                                   414 
427 static ssize_t                                    415 static ssize_t
428 pipe_write(struct kiocb *iocb, struct iov_iter    416 pipe_write(struct kiocb *iocb, struct iov_iter *from)
429 {                                                 417 {
430         struct file *filp = iocb->ki_filp;        418         struct file *filp = iocb->ki_filp;
431         struct pipe_inode_info *pipe = filp->p    419         struct pipe_inode_info *pipe = filp->private_data;
432         unsigned int head;                        420         unsigned int head;
433         ssize_t ret = 0;                          421         ssize_t ret = 0;
434         size_t total_len = iov_iter_count(from    422         size_t total_len = iov_iter_count(from);
435         ssize_t chars;                            423         ssize_t chars;
436         bool was_empty = false;                   424         bool was_empty = false;
437         bool wake_next_writer = false;            425         bool wake_next_writer = false;
438                                                   426 
439         /*                                     << 
440          * Reject writing to watch queue pipes << 
441          * the pipe.                           << 
442          * Otherwise, lockdep would be unhappy << 
443          * pipe locked.                        << 
444          * If we had to support locking a norm << 
445          * the same time, we could set up lock << 
446          * since we don't actually need that,  << 
447          */                                    << 
448         if (pipe_has_watch_queue(pipe))        << 
449                 return -EXDEV;                 << 
450                                                << 
451         /* Null write succeeds. */                427         /* Null write succeeds. */
452         if (unlikely(total_len == 0))             428         if (unlikely(total_len == 0))
453                 return 0;                         429                 return 0;
454                                                   430 
455         mutex_lock(&pipe->mutex);              !! 431         __pipe_lock(pipe);
456                                                   432 
457         if (!pipe->readers) {                     433         if (!pipe->readers) {
458                 send_sig(SIGPIPE, current, 0);    434                 send_sig(SIGPIPE, current, 0);
459                 ret = -EPIPE;                     435                 ret = -EPIPE;
460                 goto out;                         436                 goto out;
461         }                                         437         }
462                                                   438 
                                                   >> 439 #ifdef CONFIG_WATCH_QUEUE
                                                   >> 440         if (pipe->watch_queue) {
                                                   >> 441                 ret = -EXDEV;
                                                   >> 442                 goto out;
                                                   >> 443         }
                                                   >> 444 #endif
                                                   >> 445 
463         /*                                        446         /*
464          * If it wasn't empty we try to merge     447          * If it wasn't empty we try to merge new data into
465          * the last buffer.                       448          * the last buffer.
466          *                                        449          *
467          * That naturally merges small writes,    450          * That naturally merges small writes, but it also
468          * page-aligns the rest of the writes     451          * page-aligns the rest of the writes for large writes
469          * spanning multiple pages.               452          * spanning multiple pages.
470          */                                       453          */
471         head = pipe->head;                        454         head = pipe->head;
472         was_empty = pipe_empty(head, pipe->tai    455         was_empty = pipe_empty(head, pipe->tail);
473         chars = total_len & (PAGE_SIZE-1);        456         chars = total_len & (PAGE_SIZE-1);
474         if (chars && !was_empty) {                457         if (chars && !was_empty) {
475                 unsigned int mask = pipe->ring    458                 unsigned int mask = pipe->ring_size - 1;
476                 struct pipe_buffer *buf = &pip    459                 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
477                 int offset = buf->offset + buf    460                 int offset = buf->offset + buf->len;
478                                                   461 
479                 if ((buf->flags & PIPE_BUF_FLA    462                 if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
480                     offset + chars <= PAGE_SIZ    463                     offset + chars <= PAGE_SIZE) {
481                         ret = pipe_buf_confirm    464                         ret = pipe_buf_confirm(pipe, buf);
482                         if (ret)                  465                         if (ret)
483                                 goto out;         466                                 goto out;
484                                                   467 
485                         ret = copy_page_from_i    468                         ret = copy_page_from_iter(buf->page, offset, chars, from);
486                         if (unlikely(ret < cha    469                         if (unlikely(ret < chars)) {
487                                 ret = -EFAULT;    470                                 ret = -EFAULT;
488                                 goto out;         471                                 goto out;
489                         }                         472                         }
490                                                   473 
491                         buf->len += ret;          474                         buf->len += ret;
492                         if (!iov_iter_count(fr    475                         if (!iov_iter_count(from))
493                                 goto out;         476                                 goto out;
494                 }                                 477                 }
495         }                                         478         }
496                                                   479 
497         for (;;) {                                480         for (;;) {
498                 if (!pipe->readers) {             481                 if (!pipe->readers) {
499                         send_sig(SIGPIPE, curr    482                         send_sig(SIGPIPE, current, 0);
500                         if (!ret)                 483                         if (!ret)
501                                 ret = -EPIPE;     484                                 ret = -EPIPE;
502                         break;                    485                         break;
503                 }                                 486                 }
504                                                   487 
505                 head = pipe->head;                488                 head = pipe->head;
506                 if (!pipe_full(head, pipe->tai    489                 if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
507                         unsigned int mask = pi    490                         unsigned int mask = pipe->ring_size - 1;
508                         struct pipe_buffer *bu !! 491                         struct pipe_buffer *buf = &pipe->bufs[head & mask];
509                         struct page *page = pi    492                         struct page *page = pipe->tmp_page;
510                         int copied;               493                         int copied;
511                                                   494 
512                         if (!page) {              495                         if (!page) {
513                                 page = alloc_p    496                                 page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
514                                 if (unlikely(!    497                                 if (unlikely(!page)) {
515                                         ret =     498                                         ret = ret ? : -ENOMEM;
516                                         break;    499                                         break;
517                                 }                 500                                 }
518                                 pipe->tmp_page    501                                 pipe->tmp_page = page;
519                         }                         502                         }
520                                                   503 
521                         /* Allocate a slot in     504                         /* Allocate a slot in the ring in advance and attach an
522                          * empty buffer.  If w    505                          * empty buffer.  If we fault or otherwise fail to use
523                          * it, either the read    506                          * it, either the reader will consume it or it'll still
524                          * be there for the ne    507                          * be there for the next write.
525                          */                       508                          */
                                                   >> 509                         spin_lock_irq(&pipe->rd_wait.lock);
                                                   >> 510 
                                                   >> 511                         head = pipe->head;
                                                   >> 512                         if (pipe_full(head, pipe->tail, pipe->max_usage)) {
                                                   >> 513                                 spin_unlock_irq(&pipe->rd_wait.lock);
                                                   >> 514                                 continue;
                                                   >> 515                         }
                                                   >> 516 
526                         pipe->head = head + 1;    517                         pipe->head = head + 1;
                                                   >> 518                         spin_unlock_irq(&pipe->rd_wait.lock);
527                                                   519 
528                         /* Insert it into the     520                         /* Insert it into the buffer array */
529                         buf = &pipe->bufs[head    521                         buf = &pipe->bufs[head & mask];
530                         buf->page = page;         522                         buf->page = page;
531                         buf->ops = &anon_pipe_    523                         buf->ops = &anon_pipe_buf_ops;
532                         buf->offset = 0;          524                         buf->offset = 0;
533                         buf->len = 0;             525                         buf->len = 0;
534                         if (is_packetized(filp    526                         if (is_packetized(filp))
535                                 buf->flags = P    527                                 buf->flags = PIPE_BUF_FLAG_PACKET;
536                         else                      528                         else
537                                 buf->flags = P    529                                 buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
538                         pipe->tmp_page = NULL;    530                         pipe->tmp_page = NULL;
539                                                   531 
540                         copied = copy_page_fro    532                         copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
541                         if (unlikely(copied <     533                         if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
542                                 if (!ret)         534                                 if (!ret)
543                                         ret =     535                                         ret = -EFAULT;
544                                 break;            536                                 break;
545                         }                         537                         }
546                         ret += copied;            538                         ret += copied;
                                                   >> 539                         buf->offset = 0;
547                         buf->len = copied;        540                         buf->len = copied;
548                                                   541 
549                         if (!iov_iter_count(fr    542                         if (!iov_iter_count(from))
550                                 break;            543                                 break;
551                 }                                 544                 }
552                                                   545 
553                 if (!pipe_full(head, pipe->tai    546                 if (!pipe_full(head, pipe->tail, pipe->max_usage))
554                         continue;                 547                         continue;
555                                                   548 
556                 /* Wait for buffer space to be    549                 /* Wait for buffer space to become available. */
557                 if ((filp->f_flags & O_NONBLOC !! 550                 if (filp->f_flags & O_NONBLOCK) {
558                     (iocb->ki_flags & IOCB_NOW << 
559                         if (!ret)                 551                         if (!ret)
560                                 ret = -EAGAIN;    552                                 ret = -EAGAIN;
561                         break;                    553                         break;
562                 }                                 554                 }
563                 if (signal_pending(current)) {    555                 if (signal_pending(current)) {
564                         if (!ret)                 556                         if (!ret)
565                                 ret = -ERESTAR    557                                 ret = -ERESTARTSYS;
566                         break;                    558                         break;
567                 }                                 559                 }
568                                                   560 
569                 /*                                561                 /*
570                  * We're going to release the     562                  * We're going to release the pipe lock and wait for more
571                  * space. We wake up any reade    563                  * space. We wake up any readers if necessary, and then
572                  * after waiting we need to re    564                  * after waiting we need to re-check whether the pipe
573                  * become empty while we dropp    565                  * become empty while we dropped the lock.
574                  */                               566                  */
575                 mutex_unlock(&pipe->mutex);    !! 567                 __pipe_unlock(pipe);
576                 if (was_empty)                    568                 if (was_empty)
577                         wake_up_interruptible_    569                         wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
578                 kill_fasync(&pipe->fasync_read    570                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
579                 wait_event_interruptible_exclu    571                 wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
580                 mutex_lock(&pipe->mutex);      !! 572                 __pipe_lock(pipe);
581                 was_empty = pipe_empty(pipe->h    573                 was_empty = pipe_empty(pipe->head, pipe->tail);
582                 wake_next_writer = true;          574                 wake_next_writer = true;
583         }                                         575         }
584 out:                                              576 out:
585         if (pipe_full(pipe->head, pipe->tail,     577         if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
586                 wake_next_writer = false;         578                 wake_next_writer = false;
587         mutex_unlock(&pipe->mutex);            !! 579         __pipe_unlock(pipe);
588                                                   580 
589         /*                                        581         /*
590          * If we do do a wakeup event, we do a    582          * If we do do a wakeup event, we do a 'sync' wakeup, because we
591          * want the reader to start processing    583          * want the reader to start processing things asap, rather than
592          * leave the data pending.                584          * leave the data pending.
593          *                                        585          *
594          * This is particularly important for     586          * This is particularly important for small writes, because of
595          * how (for example) the GNU make jobs    587          * how (for example) the GNU make jobserver uses small writes to
596          * wake up pending jobs                   588          * wake up pending jobs
597          *                                        589          *
598          * Epoll nonsensically wants a wakeup     590          * Epoll nonsensically wants a wakeup whether the pipe
599          * was already empty or not.              591          * was already empty or not.
600          */                                       592          */
601         if (was_empty || pipe->poll_usage)        593         if (was_empty || pipe->poll_usage)
602                 wake_up_interruptible_sync_pol    594                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
603         kill_fasync(&pipe->fasync_readers, SIG    595         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
604         if (wake_next_writer)                     596         if (wake_next_writer)
605                 wake_up_interruptible_sync_pol    597                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
606         if (ret > 0 && sb_start_write_trylock(    598         if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
607                 int err = file_update_time(fil    599                 int err = file_update_time(filp);
608                 if (err)                          600                 if (err)
609                         ret = err;                601                         ret = err;
610                 sb_end_write(file_inode(filp)-    602                 sb_end_write(file_inode(filp)->i_sb);
611         }                                         603         }
612         return ret;                               604         return ret;
613 }                                                 605 }
614                                                   606 
615 static long pipe_ioctl(struct file *filp, unsi    607 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
616 {                                                 608 {
617         struct pipe_inode_info *pipe = filp->p    609         struct pipe_inode_info *pipe = filp->private_data;
618         unsigned int count, head, tail, mask;     610         unsigned int count, head, tail, mask;
619                                                   611 
620         switch (cmd) {                            612         switch (cmd) {
621         case FIONREAD:                            613         case FIONREAD:
622                 mutex_lock(&pipe->mutex);      !! 614                 __pipe_lock(pipe);
623                 count = 0;                        615                 count = 0;
624                 head = pipe->head;                616                 head = pipe->head;
625                 tail = pipe->tail;                617                 tail = pipe->tail;
626                 mask = pipe->ring_size - 1;       618                 mask = pipe->ring_size - 1;
627                                                   619 
628                 while (tail != head) {            620                 while (tail != head) {
629                         count += pipe->bufs[ta    621                         count += pipe->bufs[tail & mask].len;
630                         tail++;                   622                         tail++;
631                 }                                 623                 }
632                 mutex_unlock(&pipe->mutex);    !! 624                 __pipe_unlock(pipe);
633                                                   625 
634                 return put_user(count, (int __    626                 return put_user(count, (int __user *)arg);
635                                                   627 
636 #ifdef CONFIG_WATCH_QUEUE                         628 #ifdef CONFIG_WATCH_QUEUE
637         case IOC_WATCH_QUEUE_SET_SIZE: {          629         case IOC_WATCH_QUEUE_SET_SIZE: {
638                 int ret;                          630                 int ret;
639                 mutex_lock(&pipe->mutex);      !! 631                 __pipe_lock(pipe);
640                 ret = watch_queue_set_size(pip    632                 ret = watch_queue_set_size(pipe, arg);
641                 mutex_unlock(&pipe->mutex);    !! 633                 __pipe_unlock(pipe);
642                 return ret;                       634                 return ret;
643         }                                         635         }
644                                                   636 
645         case IOC_WATCH_QUEUE_SET_FILTER:          637         case IOC_WATCH_QUEUE_SET_FILTER:
646                 return watch_queue_set_filter(    638                 return watch_queue_set_filter(
647                         pipe, (struct watch_no    639                         pipe, (struct watch_notification_filter __user *)arg);
648 #endif                                            640 #endif
649                                                   641 
650         default:                                  642         default:
651                 return -ENOIOCTLCMD;              643                 return -ENOIOCTLCMD;
652         }                                         644         }
653 }                                                 645 }
654                                                   646 
655 /* No kernel lock held - fine */                  647 /* No kernel lock held - fine */
656 static __poll_t                                   648 static __poll_t
657 pipe_poll(struct file *filp, poll_table *wait)    649 pipe_poll(struct file *filp, poll_table *wait)
658 {                                                 650 {
659         __poll_t mask;                            651         __poll_t mask;
660         struct pipe_inode_info *pipe = filp->p    652         struct pipe_inode_info *pipe = filp->private_data;
661         unsigned int head, tail;                  653         unsigned int head, tail;
662                                                   654 
663         /* Epoll has some historical nasty sem    655         /* Epoll has some historical nasty semantics, this enables them */
664         WRITE_ONCE(pipe->poll_usage, true);       656         WRITE_ONCE(pipe->poll_usage, true);
665                                                   657 
666         /*                                        658         /*
667          * Reading pipe state only -- no need     659          * Reading pipe state only -- no need for acquiring the semaphore.
668          *                                        660          *
669          * But because this is racy, the code     661          * But because this is racy, the code has to add the
670          * entry to the poll table _first_ ..     662          * entry to the poll table _first_ ..
671          */                                       663          */
672         if (filp->f_mode & FMODE_READ)            664         if (filp->f_mode & FMODE_READ)
673                 poll_wait(filp, &pipe->rd_wait    665                 poll_wait(filp, &pipe->rd_wait, wait);
674         if (filp->f_mode & FMODE_WRITE)           666         if (filp->f_mode & FMODE_WRITE)
675                 poll_wait(filp, &pipe->wr_wait    667                 poll_wait(filp, &pipe->wr_wait, wait);
676                                                   668 
677         /*                                        669         /*
678          * .. and only then can you do the rac    670          * .. and only then can you do the racy tests. That way,
679          * if something changes and you got it    671          * if something changes and you got it wrong, the poll
680          * table entry will wake you up and fi    672          * table entry will wake you up and fix it.
681          */                                       673          */
682         head = READ_ONCE(pipe->head);             674         head = READ_ONCE(pipe->head);
683         tail = READ_ONCE(pipe->tail);             675         tail = READ_ONCE(pipe->tail);
684                                                   676 
685         mask = 0;                                 677         mask = 0;
686         if (filp->f_mode & FMODE_READ) {          678         if (filp->f_mode & FMODE_READ) {
687                 if (!pipe_empty(head, tail))      679                 if (!pipe_empty(head, tail))
688                         mask |= EPOLLIN | EPOL    680                         mask |= EPOLLIN | EPOLLRDNORM;
689                 if (!pipe->writers && filp->f_ !! 681                 if (!pipe->writers && filp->f_version != pipe->w_counter)
690                         mask |= EPOLLHUP;         682                         mask |= EPOLLHUP;
691         }                                         683         }
692                                                   684 
693         if (filp->f_mode & FMODE_WRITE) {         685         if (filp->f_mode & FMODE_WRITE) {
694                 if (!pipe_full(head, tail, pip    686                 if (!pipe_full(head, tail, pipe->max_usage))
695                         mask |= EPOLLOUT | EPO    687                         mask |= EPOLLOUT | EPOLLWRNORM;
696                 /*                                688                 /*
697                  * Most Unices do not set EPOL    689                  * Most Unices do not set EPOLLERR for FIFOs but on Linux they
698                  * behave exactly like pipes f    690                  * behave exactly like pipes for poll().
699                  */                               691                  */
700                 if (!pipe->readers)               692                 if (!pipe->readers)
701                         mask |= EPOLLERR;         693                         mask |= EPOLLERR;
702         }                                         694         }
703                                                   695 
704         return mask;                              696         return mask;
705 }                                                 697 }
706                                                   698 
707 static void put_pipe_info(struct inode *inode,    699 static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
708 {                                                 700 {
709         int kill = 0;                             701         int kill = 0;
710                                                   702 
711         spin_lock(&inode->i_lock);                703         spin_lock(&inode->i_lock);
712         if (!--pipe->files) {                     704         if (!--pipe->files) {
713                 inode->i_pipe = NULL;             705                 inode->i_pipe = NULL;
714                 kill = 1;                         706                 kill = 1;
715         }                                         707         }
716         spin_unlock(&inode->i_lock);              708         spin_unlock(&inode->i_lock);
717                                                   709 
718         if (kill)                                 710         if (kill)
719                 free_pipe_info(pipe);             711                 free_pipe_info(pipe);
720 }                                                 712 }
721                                                   713 
722 static int                                        714 static int
723 pipe_release(struct inode *inode, struct file     715 pipe_release(struct inode *inode, struct file *file)
724 {                                                 716 {
725         struct pipe_inode_info *pipe = file->p    717         struct pipe_inode_info *pipe = file->private_data;
726                                                   718 
727         mutex_lock(&pipe->mutex);              !! 719         __pipe_lock(pipe);
728         if (file->f_mode & FMODE_READ)            720         if (file->f_mode & FMODE_READ)
729                 pipe->readers--;                  721                 pipe->readers--;
730         if (file->f_mode & FMODE_WRITE)           722         if (file->f_mode & FMODE_WRITE)
731                 pipe->writers--;                  723                 pipe->writers--;
732                                                   724 
733         /* Was that the last reader or writer,    725         /* Was that the last reader or writer, but not the other side? */
734         if (!pipe->readers != !pipe->writers)     726         if (!pipe->readers != !pipe->writers) {
735                 wake_up_interruptible_all(&pip    727                 wake_up_interruptible_all(&pipe->rd_wait);
736                 wake_up_interruptible_all(&pip    728                 wake_up_interruptible_all(&pipe->wr_wait);
737                 kill_fasync(&pipe->fasync_read    729                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
738                 kill_fasync(&pipe->fasync_writ    730                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
739         }                                         731         }
740         mutex_unlock(&pipe->mutex);            !! 732         __pipe_unlock(pipe);
741                                                   733 
742         put_pipe_info(inode, pipe);               734         put_pipe_info(inode, pipe);
743         return 0;                                 735         return 0;
744 }                                                 736 }
745                                                   737 
746 static int                                        738 static int
747 pipe_fasync(int fd, struct file *filp, int on)    739 pipe_fasync(int fd, struct file *filp, int on)
748 {                                                 740 {
749         struct pipe_inode_info *pipe = filp->p    741         struct pipe_inode_info *pipe = filp->private_data;
750         int retval = 0;                           742         int retval = 0;
751                                                   743 
752         mutex_lock(&pipe->mutex);              !! 744         __pipe_lock(pipe);
753         if (filp->f_mode & FMODE_READ)            745         if (filp->f_mode & FMODE_READ)
754                 retval = fasync_helper(fd, fil    746                 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
755         if ((filp->f_mode & FMODE_WRITE) && re    747         if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
756                 retval = fasync_helper(fd, fil    748                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
757                 if (retval < 0 && (filp->f_mod    749                 if (retval < 0 && (filp->f_mode & FMODE_READ))
758                         /* this can happen onl    750                         /* this can happen only if on == T */
759                         fasync_helper(-1, filp    751                         fasync_helper(-1, filp, 0, &pipe->fasync_readers);
760         }                                         752         }
761         mutex_unlock(&pipe->mutex);            !! 753         __pipe_unlock(pipe);
762         return retval;                            754         return retval;
763 }                                                 755 }
764                                                   756 
765 unsigned long account_pipe_buffers(struct user    757 unsigned long account_pipe_buffers(struct user_struct *user,
766                                    unsigned lo    758                                    unsigned long old, unsigned long new)
767 {                                                 759 {
768         return atomic_long_add_return(new - ol    760         return atomic_long_add_return(new - old, &user->pipe_bufs);
769 }                                                 761 }
770                                                   762 
771 bool too_many_pipe_buffers_soft(unsigned long     763 bool too_many_pipe_buffers_soft(unsigned long user_bufs)
772 {                                                 764 {
773         unsigned long soft_limit = READ_ONCE(p    765         unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
774                                                   766 
775         return soft_limit && user_bufs > soft_    767         return soft_limit && user_bufs > soft_limit;
776 }                                                 768 }
777                                                   769 
778 bool too_many_pipe_buffers_hard(unsigned long     770 bool too_many_pipe_buffers_hard(unsigned long user_bufs)
779 {                                                 771 {
780         unsigned long hard_limit = READ_ONCE(p    772         unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
781                                                   773 
782         return hard_limit && user_bufs > hard_    774         return hard_limit && user_bufs > hard_limit;
783 }                                                 775 }
784                                                   776 
785 bool pipe_is_unprivileged_user(void)              777 bool pipe_is_unprivileged_user(void)
786 {                                                 778 {
787         return !capable(CAP_SYS_RESOURCE) && !    779         return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
788 }                                                 780 }
789                                                   781 
790 struct pipe_inode_info *alloc_pipe_info(void)     782 struct pipe_inode_info *alloc_pipe_info(void)
791 {                                                 783 {
792         struct pipe_inode_info *pipe;             784         struct pipe_inode_info *pipe;
793         unsigned long pipe_bufs = PIPE_DEF_BUF    785         unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
794         struct user_struct *user = get_current    786         struct user_struct *user = get_current_user();
795         unsigned long user_bufs;                  787         unsigned long user_bufs;
796         unsigned int max_size = READ_ONCE(pipe    788         unsigned int max_size = READ_ONCE(pipe_max_size);
797                                                   789 
798         pipe = kzalloc(sizeof(struct pipe_inod    790         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
799         if (pipe == NULL)                         791         if (pipe == NULL)
800                 goto out_free_uid;                792                 goto out_free_uid;
801                                                   793 
802         if (pipe_bufs * PAGE_SIZE > max_size &    794         if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
803                 pipe_bufs = max_size >> PAGE_S    795                 pipe_bufs = max_size >> PAGE_SHIFT;
804                                                   796 
805         user_bufs = account_pipe_buffers(user,    797         user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
806                                                   798 
807         if (too_many_pipe_buffers_soft(user_bu    799         if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
808                 user_bufs = account_pipe_buffe    800                 user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
809                 pipe_bufs = PIPE_MIN_DEF_BUFFE    801                 pipe_bufs = PIPE_MIN_DEF_BUFFERS;
810         }                                         802         }
811                                                   803 
812         if (too_many_pipe_buffers_hard(user_bu    804         if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
813                 goto out_revert_acct;             805                 goto out_revert_acct;
814                                                   806 
815         pipe->bufs = kcalloc(pipe_bufs, sizeof    807         pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
816                              GFP_KERNEL_ACCOUN    808                              GFP_KERNEL_ACCOUNT);
817                                                   809 
818         if (pipe->bufs) {                         810         if (pipe->bufs) {
819                 init_waitqueue_head(&pipe->rd_    811                 init_waitqueue_head(&pipe->rd_wait);
820                 init_waitqueue_head(&pipe->wr_    812                 init_waitqueue_head(&pipe->wr_wait);
821                 pipe->r_counter = pipe->w_coun    813                 pipe->r_counter = pipe->w_counter = 1;
822                 pipe->max_usage = pipe_bufs;      814                 pipe->max_usage = pipe_bufs;
823                 pipe->ring_size = pipe_bufs;      815                 pipe->ring_size = pipe_bufs;
824                 pipe->nr_accounted = pipe_bufs    816                 pipe->nr_accounted = pipe_bufs;
825                 pipe->user = user;                817                 pipe->user = user;
826                 mutex_init(&pipe->mutex);         818                 mutex_init(&pipe->mutex);
827                 lock_set_cmp_fn(&pipe->mutex,  << 
828                 return pipe;                      819                 return pipe;
829         }                                         820         }
830                                                   821 
831 out_revert_acct:                                  822 out_revert_acct:
832         (void) account_pipe_buffers(user, pipe    823         (void) account_pipe_buffers(user, pipe_bufs, 0);
833         kfree(pipe);                              824         kfree(pipe);
834 out_free_uid:                                     825 out_free_uid:
835         free_uid(user);                           826         free_uid(user);
836         return NULL;                              827         return NULL;
837 }                                                 828 }
838                                                   829 
839 void free_pipe_info(struct pipe_inode_info *pi    830 void free_pipe_info(struct pipe_inode_info *pipe)
840 {                                                 831 {
841         unsigned int i;                           832         unsigned int i;
842                                                   833 
843 #ifdef CONFIG_WATCH_QUEUE                         834 #ifdef CONFIG_WATCH_QUEUE
844         if (pipe->watch_queue)                    835         if (pipe->watch_queue)
845                 watch_queue_clear(pipe->watch_    836                 watch_queue_clear(pipe->watch_queue);
846 #endif                                            837 #endif
847                                                   838 
848         (void) account_pipe_buffers(pipe->user    839         (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
849         free_uid(pipe->user);                     840         free_uid(pipe->user);
850         for (i = 0; i < pipe->ring_size; i++)     841         for (i = 0; i < pipe->ring_size; i++) {
851                 struct pipe_buffer *buf = pipe    842                 struct pipe_buffer *buf = pipe->bufs + i;
852                 if (buf->ops)                     843                 if (buf->ops)
853                         pipe_buf_release(pipe,    844                         pipe_buf_release(pipe, buf);
854         }                                         845         }
855 #ifdef CONFIG_WATCH_QUEUE                         846 #ifdef CONFIG_WATCH_QUEUE
856         if (pipe->watch_queue)                    847         if (pipe->watch_queue)
857                 put_watch_queue(pipe->watch_qu    848                 put_watch_queue(pipe->watch_queue);
858 #endif                                            849 #endif
859         if (pipe->tmp_page)                       850         if (pipe->tmp_page)
860                 __free_page(pipe->tmp_page);      851                 __free_page(pipe->tmp_page);
861         kfree(pipe->bufs);                        852         kfree(pipe->bufs);
862         kfree(pipe);                              853         kfree(pipe);
863 }                                                 854 }
864                                                   855 
865 static struct vfsmount *pipe_mnt __ro_after_in !! 856 static struct vfsmount *pipe_mnt __read_mostly;
866                                                   857 
867 /*                                                858 /*
868  * pipefs_dname() is called from d_path().        859  * pipefs_dname() is called from d_path().
869  */                                               860  */
870 static char *pipefs_dname(struct dentry *dentr    861 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
871 {                                                 862 {
872         return dynamic_dname(buffer, buflen, " !! 863         return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
873                                 d_inode(dentry    864                                 d_inode(dentry)->i_ino);
874 }                                                 865 }
875                                                   866 
876 static const struct dentry_operations pipefs_d    867 static const struct dentry_operations pipefs_dentry_operations = {
877         .d_dname        = pipefs_dname,           868         .d_dname        = pipefs_dname,
878 };                                                869 };
879                                                   870 
880 static struct inode * get_pipe_inode(void)        871 static struct inode * get_pipe_inode(void)
881 {                                                 872 {
882         struct inode *inode = new_inode_pseudo    873         struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
883         struct pipe_inode_info *pipe;             874         struct pipe_inode_info *pipe;
884                                                   875 
885         if (!inode)                               876         if (!inode)
886                 goto fail_inode;                  877                 goto fail_inode;
887                                                   878 
888         inode->i_ino = get_next_ino();            879         inode->i_ino = get_next_ino();
889                                                   880 
890         pipe = alloc_pipe_info();                 881         pipe = alloc_pipe_info();
891         if (!pipe)                                882         if (!pipe)
892                 goto fail_iput;                   883                 goto fail_iput;
893                                                   884 
894         inode->i_pipe = pipe;                     885         inode->i_pipe = pipe;
895         pipe->files = 2;                          886         pipe->files = 2;
896         pipe->readers = pipe->writers = 1;        887         pipe->readers = pipe->writers = 1;
897         inode->i_fop = &pipefifo_fops;            888         inode->i_fop = &pipefifo_fops;
898                                                   889 
899         /*                                        890         /*
900          * Mark the inode dirty from the very     891          * Mark the inode dirty from the very beginning,
901          * that way it will never be moved to     892          * that way it will never be moved to the dirty
902          * list because "mark_inode_dirty()" w    893          * list because "mark_inode_dirty()" will think
903          * that it already _is_ on the dirty l    894          * that it already _is_ on the dirty list.
904          */                                       895          */
905         inode->i_state = I_DIRTY;                 896         inode->i_state = I_DIRTY;
906         inode->i_mode = S_IFIFO | S_IRUSR | S_    897         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
907         inode->i_uid = current_fsuid();           898         inode->i_uid = current_fsuid();
908         inode->i_gid = current_fsgid();           899         inode->i_gid = current_fsgid();
909         simple_inode_init_ts(inode);           !! 900         inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
910                                                   901 
911         return inode;                             902         return inode;
912                                                   903 
913 fail_iput:                                        904 fail_iput:
914         iput(inode);                              905         iput(inode);
915                                                   906 
916 fail_inode:                                       907 fail_inode:
917         return NULL;                              908         return NULL;
918 }                                                 909 }
919                                                   910 
920 int create_pipe_files(struct file **res, int f    911 int create_pipe_files(struct file **res, int flags)
921 {                                                 912 {
922         struct inode *inode = get_pipe_inode()    913         struct inode *inode = get_pipe_inode();
923         struct file *f;                           914         struct file *f;
924         int error;                                915         int error;
925                                                   916 
926         if (!inode)                               917         if (!inode)
927                 return -ENFILE;                   918                 return -ENFILE;
928                                                   919 
929         if (flags & O_NOTIFICATION_PIPE) {        920         if (flags & O_NOTIFICATION_PIPE) {
930                 error = watch_queue_init(inode    921                 error = watch_queue_init(inode->i_pipe);
931                 if (error) {                      922                 if (error) {
932                         free_pipe_info(inode->    923                         free_pipe_info(inode->i_pipe);
933                         iput(inode);              924                         iput(inode);
934                         return error;             925                         return error;
935                 }                                 926                 }
936         }                                         927         }
937                                                   928 
938         f = alloc_file_pseudo(inode, pipe_mnt,    929         f = alloc_file_pseudo(inode, pipe_mnt, "",
939                                 O_WRONLY | (fl    930                                 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
940                                 &pipefifo_fops    931                                 &pipefifo_fops);
941         if (IS_ERR(f)) {                          932         if (IS_ERR(f)) {
942                 free_pipe_info(inode->i_pipe);    933                 free_pipe_info(inode->i_pipe);
943                 iput(inode);                      934                 iput(inode);
944                 return PTR_ERR(f);                935                 return PTR_ERR(f);
945         }                                         936         }
946                                                   937 
947         f->private_data = inode->i_pipe;          938         f->private_data = inode->i_pipe;
948         f->f_pipe = 0;                         << 
949                                                   939 
950         res[0] = alloc_file_clone(f, O_RDONLY     940         res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
951                                   &pipefifo_fo    941                                   &pipefifo_fops);
952         if (IS_ERR(res[0])) {                     942         if (IS_ERR(res[0])) {
953                 put_pipe_info(inode, inode->i_    943                 put_pipe_info(inode, inode->i_pipe);
954                 fput(f);                          944                 fput(f);
955                 return PTR_ERR(res[0]);           945                 return PTR_ERR(res[0]);
956         }                                         946         }
957         res[0]->private_data = inode->i_pipe;     947         res[0]->private_data = inode->i_pipe;
958         res[0]->f_pipe = 0;                    << 
959         res[1] = f;                               948         res[1] = f;
960         stream_open(inode, res[0]);               949         stream_open(inode, res[0]);
961         stream_open(inode, res[1]);               950         stream_open(inode, res[1]);
962         return 0;                                 951         return 0;
963 }                                                 952 }
964                                                   953 
965 static int __do_pipe_flags(int *fd, struct fil    954 static int __do_pipe_flags(int *fd, struct file **files, int flags)
966 {                                                 955 {
967         int error;                                956         int error;
968         int fdw, fdr;                             957         int fdw, fdr;
969                                                   958 
970         if (flags & ~(O_CLOEXEC | O_NONBLOCK |    959         if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
971                 return -EINVAL;                   960                 return -EINVAL;
972                                                   961 
973         error = create_pipe_files(files, flags    962         error = create_pipe_files(files, flags);
974         if (error)                                963         if (error)
975                 return error;                     964                 return error;
976                                                   965 
977         error = get_unused_fd_flags(flags);       966         error = get_unused_fd_flags(flags);
978         if (error < 0)                            967         if (error < 0)
979                 goto err_read_pipe;               968                 goto err_read_pipe;
980         fdr = error;                              969         fdr = error;
981                                                   970 
982         error = get_unused_fd_flags(flags);       971         error = get_unused_fd_flags(flags);
983         if (error < 0)                            972         if (error < 0)
984                 goto err_fdr;                     973                 goto err_fdr;
985         fdw = error;                              974         fdw = error;
986                                                   975 
987         audit_fd_pair(fdr, fdw);                  976         audit_fd_pair(fdr, fdw);
988         fd[0] = fdr;                              977         fd[0] = fdr;
989         fd[1] = fdw;                              978         fd[1] = fdw;
990         /* pipe groks IOCB_NOWAIT */           << 
991         files[0]->f_mode |= FMODE_NOWAIT;      << 
992         files[1]->f_mode |= FMODE_NOWAIT;      << 
993         return 0;                                 979         return 0;
994                                                   980 
995  err_fdr:                                         981  err_fdr:
996         put_unused_fd(fdr);                       982         put_unused_fd(fdr);
997  err_read_pipe:                                   983  err_read_pipe:
998         fput(files[0]);                           984         fput(files[0]);
999         fput(files[1]);                           985         fput(files[1]);
1000         return error;                            986         return error;
1001 }                                                987 }
1002                                                  988 
1003 int do_pipe_flags(int *fd, int flags)            989 int do_pipe_flags(int *fd, int flags)
1004 {                                                990 {
1005         struct file *files[2];                   991         struct file *files[2];
1006         int error = __do_pipe_flags(fd, files    992         int error = __do_pipe_flags(fd, files, flags);
1007         if (!error) {                            993         if (!error) {
1008                 fd_install(fd[0], files[0]);     994                 fd_install(fd[0], files[0]);
1009                 fd_install(fd[1], files[1]);     995                 fd_install(fd[1], files[1]);
1010         }                                        996         }
1011         return error;                            997         return error;
1012 }                                                998 }
1013                                                  999 
1014 /*                                               1000 /*
1015  * sys_pipe() is the normal C calling standar    1001  * sys_pipe() is the normal C calling standard for creating
1016  * a pipe. It's not the way Unix traditionall    1002  * a pipe. It's not the way Unix traditionally does this, though.
1017  */                                              1003  */
1018 static int do_pipe2(int __user *fildes, int f    1004 static int do_pipe2(int __user *fildes, int flags)
1019 {                                                1005 {
1020         struct file *files[2];                   1006         struct file *files[2];
1021         int fd[2];                               1007         int fd[2];
1022         int error;                               1008         int error;
1023                                                  1009 
1024         error = __do_pipe_flags(fd, files, fl    1010         error = __do_pipe_flags(fd, files, flags);
1025         if (!error) {                            1011         if (!error) {
1026                 if (unlikely(copy_to_user(fil    1012                 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
1027                         fput(files[0]);          1013                         fput(files[0]);
1028                         fput(files[1]);          1014                         fput(files[1]);
1029                         put_unused_fd(fd[0]);    1015                         put_unused_fd(fd[0]);
1030                         put_unused_fd(fd[1]);    1016                         put_unused_fd(fd[1]);
1031                         error = -EFAULT;         1017                         error = -EFAULT;
1032                 } else {                         1018                 } else {
1033                         fd_install(fd[0], fil    1019                         fd_install(fd[0], files[0]);
1034                         fd_install(fd[1], fil    1020                         fd_install(fd[1], files[1]);
1035                 }                                1021                 }
1036         }                                        1022         }
1037         return error;                            1023         return error;
1038 }                                                1024 }
1039                                                  1025 
1040 SYSCALL_DEFINE2(pipe2, int __user *, fildes,     1026 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1041 {                                                1027 {
1042         return do_pipe2(fildes, flags);          1028         return do_pipe2(fildes, flags);
1043 }                                                1029 }
1044                                                  1030 
1045 SYSCALL_DEFINE1(pipe, int __user *, fildes)      1031 SYSCALL_DEFINE1(pipe, int __user *, fildes)
1046 {                                                1032 {
1047         return do_pipe2(fildes, 0);              1033         return do_pipe2(fildes, 0);
1048 }                                                1034 }
1049                                                  1035 
1050 /*                                               1036 /*
1051  * This is the stupid "wait for pipe to be re    1037  * This is the stupid "wait for pipe to be readable or writable"
1052  * model.                                        1038  * model.
1053  *                                               1039  *
1054  * See pipe_read/write() for the proper kind     1040  * See pipe_read/write() for the proper kind of exclusive wait,
1055  * but that requires that we wake up any othe    1041  * but that requires that we wake up any other readers/writers
1056  * if we then do not end up reading everythin    1042  * if we then do not end up reading everything (ie the whole
1057  * "wake_next_reader/writer" logic in pipe_re    1043  * "wake_next_reader/writer" logic in pipe_read/write()).
1058  */                                              1044  */
1059 void pipe_wait_readable(struct pipe_inode_inf    1045 void pipe_wait_readable(struct pipe_inode_info *pipe)
1060 {                                                1046 {
1061         pipe_unlock(pipe);                       1047         pipe_unlock(pipe);
1062         wait_event_interruptible(pipe->rd_wai    1048         wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
1063         pipe_lock(pipe);                         1049         pipe_lock(pipe);
1064 }                                                1050 }
1065                                                  1051 
1066 void pipe_wait_writable(struct pipe_inode_inf    1052 void pipe_wait_writable(struct pipe_inode_info *pipe)
1067 {                                                1053 {
1068         pipe_unlock(pipe);                       1054         pipe_unlock(pipe);
1069         wait_event_interruptible(pipe->wr_wai    1055         wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
1070         pipe_lock(pipe);                         1056         pipe_lock(pipe);
1071 }                                                1057 }
1072                                                  1058 
1073 /*                                               1059 /*
1074  * This depends on both the wait (here) and t    1060  * This depends on both the wait (here) and the wakeup (wake_up_partner)
1075  * holding the pipe lock, so "*cnt" is stable    1061  * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
1076  * race with the count check and waitqueue pr    1062  * race with the count check and waitqueue prep.
1077  *                                               1063  *
1078  * Normally in order to avoid races, you'd do    1064  * Normally in order to avoid races, you'd do the prepare_to_wait() first,
1079  * then check the condition you're waiting fo    1065  * then check the condition you're waiting for, and only then sleep. But
1080  * because of the pipe lock, we can check the    1066  * because of the pipe lock, we can check the condition before being on
1081  * the wait queue.                               1067  * the wait queue.
1082  *                                               1068  *
1083  * We use the 'rd_wait' waitqueue for pipe pa    1069  * We use the 'rd_wait' waitqueue for pipe partner waiting.
1084  */                                              1070  */
1085 static int wait_for_partner(struct pipe_inode    1071 static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
1086 {                                                1072 {
1087         DEFINE_WAIT(rdwait);                     1073         DEFINE_WAIT(rdwait);
1088         int cur = *cnt;                          1074         int cur = *cnt;
1089                                                  1075 
1090         while (cur == *cnt) {                    1076         while (cur == *cnt) {
1091                 prepare_to_wait(&pipe->rd_wai    1077                 prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
1092                 pipe_unlock(pipe);               1078                 pipe_unlock(pipe);
1093                 schedule();                      1079                 schedule();
1094                 finish_wait(&pipe->rd_wait, &    1080                 finish_wait(&pipe->rd_wait, &rdwait);
1095                 pipe_lock(pipe);                 1081                 pipe_lock(pipe);
1096                 if (signal_pending(current))     1082                 if (signal_pending(current))
1097                         break;                   1083                         break;
1098         }                                        1084         }
1099         return cur == *cnt ? -ERESTARTSYS : 0    1085         return cur == *cnt ? -ERESTARTSYS : 0;
1100 }                                                1086 }
1101                                                  1087 
1102 static void wake_up_partner(struct pipe_inode    1088 static void wake_up_partner(struct pipe_inode_info *pipe)
1103 {                                                1089 {
1104         wake_up_interruptible_all(&pipe->rd_w    1090         wake_up_interruptible_all(&pipe->rd_wait);
1105 }                                                1091 }
1106                                                  1092 
1107 static int fifo_open(struct inode *inode, str    1093 static int fifo_open(struct inode *inode, struct file *filp)
1108 {                                                1094 {
1109         struct pipe_inode_info *pipe;            1095         struct pipe_inode_info *pipe;
1110         bool is_pipe = inode->i_sb->s_magic =    1096         bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
1111         int ret;                                 1097         int ret;
1112                                                  1098 
1113         filp->f_pipe = 0;                     !! 1099         filp->f_version = 0;
1114                                                  1100 
1115         spin_lock(&inode->i_lock);               1101         spin_lock(&inode->i_lock);
1116         if (inode->i_pipe) {                     1102         if (inode->i_pipe) {
1117                 pipe = inode->i_pipe;            1103                 pipe = inode->i_pipe;
1118                 pipe->files++;                   1104                 pipe->files++;
1119                 spin_unlock(&inode->i_lock);     1105                 spin_unlock(&inode->i_lock);
1120         } else {                                 1106         } else {
1121                 spin_unlock(&inode->i_lock);     1107                 spin_unlock(&inode->i_lock);
1122                 pipe = alloc_pipe_info();        1108                 pipe = alloc_pipe_info();
1123                 if (!pipe)                       1109                 if (!pipe)
1124                         return -ENOMEM;          1110                         return -ENOMEM;
1125                 pipe->files = 1;                 1111                 pipe->files = 1;
1126                 spin_lock(&inode->i_lock);       1112                 spin_lock(&inode->i_lock);
1127                 if (unlikely(inode->i_pipe))     1113                 if (unlikely(inode->i_pipe)) {
1128                         inode->i_pipe->files+    1114                         inode->i_pipe->files++;
1129                         spin_unlock(&inode->i    1115                         spin_unlock(&inode->i_lock);
1130                         free_pipe_info(pipe);    1116                         free_pipe_info(pipe);
1131                         pipe = inode->i_pipe;    1117                         pipe = inode->i_pipe;
1132                 } else {                         1118                 } else {
1133                         inode->i_pipe = pipe;    1119                         inode->i_pipe = pipe;
1134                         spin_unlock(&inode->i    1120                         spin_unlock(&inode->i_lock);
1135                 }                                1121                 }
1136         }                                        1122         }
1137         filp->private_data = pipe;               1123         filp->private_data = pipe;
1138         /* OK, we have a pipe and it's pinned    1124         /* OK, we have a pipe and it's pinned down */
1139                                                  1125 
1140         mutex_lock(&pipe->mutex);             !! 1126         __pipe_lock(pipe);
1141                                                  1127 
1142         /* We can only do regular read/write     1128         /* We can only do regular read/write on fifos */
1143         stream_open(inode, filp);                1129         stream_open(inode, filp);
1144                                                  1130 
1145         switch (filp->f_mode & (FMODE_READ |     1131         switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
1146         case FMODE_READ:                         1132         case FMODE_READ:
1147         /*                                       1133         /*
1148          *  O_RDONLY                             1134          *  O_RDONLY
1149          *  POSIX.1 says that O_NONBLOCK mean    1135          *  POSIX.1 says that O_NONBLOCK means return with the FIFO
1150          *  opened, even when there is no pro    1136          *  opened, even when there is no process writing the FIFO.
1151          */                                      1137          */
1152                 pipe->r_counter++;               1138                 pipe->r_counter++;
1153                 if (pipe->readers++ == 0)        1139                 if (pipe->readers++ == 0)
1154                         wake_up_partner(pipe)    1140                         wake_up_partner(pipe);
1155                                                  1141 
1156                 if (!is_pipe && !pipe->writer    1142                 if (!is_pipe && !pipe->writers) {
1157                         if ((filp->f_flags &     1143                         if ((filp->f_flags & O_NONBLOCK)) {
1158                                 /* suppress E    1144                                 /* suppress EPOLLHUP until we have
1159                                  * seen a wri    1145                                  * seen a writer */
1160                                 filp->f_pipe  !! 1146                                 filp->f_version = pipe->w_counter;
1161                         } else {                 1147                         } else {
1162                                 if (wait_for_    1148                                 if (wait_for_partner(pipe, &pipe->w_counter))
1163                                         goto     1149                                         goto err_rd;
1164                         }                        1150                         }
1165                 }                                1151                 }
1166                 break;                           1152                 break;
1167                                                  1153 
1168         case FMODE_WRITE:                        1154         case FMODE_WRITE:
1169         /*                                       1155         /*
1170          *  O_WRONLY                             1156          *  O_WRONLY
1171          *  POSIX.1 says that O_NONBLOCK mean    1157          *  POSIX.1 says that O_NONBLOCK means return -1 with
1172          *  errno=ENXIO when there is no proc    1158          *  errno=ENXIO when there is no process reading the FIFO.
1173          */                                      1159          */
1174                 ret = -ENXIO;                    1160                 ret = -ENXIO;
1175                 if (!is_pipe && (filp->f_flag    1161                 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
1176                         goto err;                1162                         goto err;
1177                                                  1163 
1178                 pipe->w_counter++;               1164                 pipe->w_counter++;
1179                 if (!pipe->writers++)            1165                 if (!pipe->writers++)
1180                         wake_up_partner(pipe)    1166                         wake_up_partner(pipe);
1181                                                  1167 
1182                 if (!is_pipe && !pipe->reader    1168                 if (!is_pipe && !pipe->readers) {
1183                         if (wait_for_partner(    1169                         if (wait_for_partner(pipe, &pipe->r_counter))
1184                                 goto err_wr;     1170                                 goto err_wr;
1185                 }                                1171                 }
1186                 break;                           1172                 break;
1187                                                  1173 
1188         case FMODE_READ | FMODE_WRITE:           1174         case FMODE_READ | FMODE_WRITE:
1189         /*                                       1175         /*
1190          *  O_RDWR                               1176          *  O_RDWR
1191          *  POSIX.1 leaves this case "undefin    1177          *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
1192          *  This implementation will NEVER bl    1178          *  This implementation will NEVER block on a O_RDWR open, since
1193          *  the process can at least talk to     1179          *  the process can at least talk to itself.
1194          */                                      1180          */
1195                                                  1181 
1196                 pipe->readers++;                 1182                 pipe->readers++;
1197                 pipe->writers++;                 1183                 pipe->writers++;
1198                 pipe->r_counter++;               1184                 pipe->r_counter++;
1199                 pipe->w_counter++;               1185                 pipe->w_counter++;
1200                 if (pipe->readers == 1 || pip    1186                 if (pipe->readers == 1 || pipe->writers == 1)
1201                         wake_up_partner(pipe)    1187                         wake_up_partner(pipe);
1202                 break;                           1188                 break;
1203                                                  1189 
1204         default:                                 1190         default:
1205                 ret = -EINVAL;                   1191                 ret = -EINVAL;
1206                 goto err;                        1192                 goto err;
1207         }                                        1193         }
1208                                                  1194 
1209         /* Ok! */                                1195         /* Ok! */
1210         mutex_unlock(&pipe->mutex);           !! 1196         __pipe_unlock(pipe);
1211         return 0;                                1197         return 0;
1212                                                  1198 
1213 err_rd:                                          1199 err_rd:
1214         if (!--pipe->readers)                    1200         if (!--pipe->readers)
1215                 wake_up_interruptible(&pipe->    1201                 wake_up_interruptible(&pipe->wr_wait);
1216         ret = -ERESTARTSYS;                      1202         ret = -ERESTARTSYS;
1217         goto err;                                1203         goto err;
1218                                                  1204 
1219 err_wr:                                          1205 err_wr:
1220         if (!--pipe->writers)                    1206         if (!--pipe->writers)
1221                 wake_up_interruptible_all(&pi    1207                 wake_up_interruptible_all(&pipe->rd_wait);
1222         ret = -ERESTARTSYS;                      1208         ret = -ERESTARTSYS;
1223         goto err;                                1209         goto err;
1224                                                  1210 
1225 err:                                             1211 err:
1226         mutex_unlock(&pipe->mutex);           !! 1212         __pipe_unlock(pipe);
1227                                                  1213 
1228         put_pipe_info(inode, pipe);              1214         put_pipe_info(inode, pipe);
1229         return ret;                              1215         return ret;
1230 }                                                1216 }
1231                                                  1217 
1232 const struct file_operations pipefifo_fops =     1218 const struct file_operations pipefifo_fops = {
1233         .open           = fifo_open,             1219         .open           = fifo_open,
                                                   >> 1220         .llseek         = no_llseek,
1234         .read_iter      = pipe_read,             1221         .read_iter      = pipe_read,
1235         .write_iter     = pipe_write,            1222         .write_iter     = pipe_write,
1236         .poll           = pipe_poll,             1223         .poll           = pipe_poll,
1237         .unlocked_ioctl = pipe_ioctl,            1224         .unlocked_ioctl = pipe_ioctl,
1238         .release        = pipe_release,          1225         .release        = pipe_release,
1239         .fasync         = pipe_fasync,           1226         .fasync         = pipe_fasync,
1240         .splice_write   = iter_file_splice_wr    1227         .splice_write   = iter_file_splice_write,
1241 };                                               1228 };
1242                                                  1229 
1243 /*                                               1230 /*
1244  * Currently we rely on the pipe array holdin    1231  * Currently we rely on the pipe array holding a power-of-2 number
1245  * of pages. Returns 0 on error.                 1232  * of pages. Returns 0 on error.
1246  */                                              1233  */
1247 unsigned int round_pipe_size(unsigned int siz !! 1234 unsigned int round_pipe_size(unsigned long size)
1248 {                                                1235 {
1249         if (size > (1U << 31))                   1236         if (size > (1U << 31))
1250                 return 0;                        1237                 return 0;
1251                                                  1238 
1252         /* Minimum pipe size, as required by     1239         /* Minimum pipe size, as required by POSIX */
1253         if (size < PAGE_SIZE)                    1240         if (size < PAGE_SIZE)
1254                 return PAGE_SIZE;                1241                 return PAGE_SIZE;
1255                                                  1242 
1256         return roundup_pow_of_two(size);         1243         return roundup_pow_of_two(size);
1257 }                                                1244 }
1258                                                  1245 
1259 /*                                               1246 /*
1260  * Resize the pipe ring to a number of slots.    1247  * Resize the pipe ring to a number of slots.
1261  *                                               1248  *
1262  * Note the pipe can be reduced in capacity,     1249  * Note the pipe can be reduced in capacity, but only if the current
1263  * occupancy doesn't exceed nr_slots; if it d    1250  * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
1264  * returned instead.                             1251  * returned instead.
1265  */                                              1252  */
1266 int pipe_resize_ring(struct pipe_inode_info *    1253 int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
1267 {                                                1254 {
1268         struct pipe_buffer *bufs;                1255         struct pipe_buffer *bufs;
1269         unsigned int head, tail, mask, n;        1256         unsigned int head, tail, mask, n;
1270                                                  1257 
1271         bufs = kcalloc(nr_slots, sizeof(*bufs    1258         bufs = kcalloc(nr_slots, sizeof(*bufs),
1272                        GFP_KERNEL_ACCOUNT | _    1259                        GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
1273         if (unlikely(!bufs))                     1260         if (unlikely(!bufs))
1274                 return -ENOMEM;                  1261                 return -ENOMEM;
1275                                                  1262 
1276         spin_lock_irq(&pipe->rd_wait.lock);      1263         spin_lock_irq(&pipe->rd_wait.lock);
1277         mask = pipe->ring_size - 1;              1264         mask = pipe->ring_size - 1;
1278         head = pipe->head;                       1265         head = pipe->head;
1279         tail = pipe->tail;                       1266         tail = pipe->tail;
1280                                                  1267 
1281         n = pipe_occupancy(head, tail);          1268         n = pipe_occupancy(head, tail);
1282         if (nr_slots < n) {                      1269         if (nr_slots < n) {
1283                 spin_unlock_irq(&pipe->rd_wai    1270                 spin_unlock_irq(&pipe->rd_wait.lock);
1284                 kfree(bufs);                     1271                 kfree(bufs);
1285                 return -EBUSY;                   1272                 return -EBUSY;
1286         }                                        1273         }
1287                                                  1274 
1288         /*                                       1275         /*
1289          * The pipe array wraps around, so ju    1276          * The pipe array wraps around, so just start the new one at zero
1290          * and adjust the indices.               1277          * and adjust the indices.
1291          */                                      1278          */
1292         if (n > 0) {                             1279         if (n > 0) {
1293                 unsigned int h = head & mask;    1280                 unsigned int h = head & mask;
1294                 unsigned int t = tail & mask;    1281                 unsigned int t = tail & mask;
1295                 if (h > t) {                     1282                 if (h > t) {
1296                         memcpy(bufs, pipe->bu    1283                         memcpy(bufs, pipe->bufs + t,
1297                                n * sizeof(str    1284                                n * sizeof(struct pipe_buffer));
1298                 } else {                         1285                 } else {
1299                         unsigned int tsize =     1286                         unsigned int tsize = pipe->ring_size - t;
1300                         if (h > 0)               1287                         if (h > 0)
1301                                 memcpy(bufs +    1288                                 memcpy(bufs + tsize, pipe->bufs,
1302                                        h * si    1289                                        h * sizeof(struct pipe_buffer));
1303                         memcpy(bufs, pipe->bu    1290                         memcpy(bufs, pipe->bufs + t,
1304                                tsize * sizeof    1291                                tsize * sizeof(struct pipe_buffer));
1305                 }                                1292                 }
1306         }                                        1293         }
1307                                                  1294 
1308         head = n;                                1295         head = n;
1309         tail = 0;                                1296         tail = 0;
1310                                                  1297 
1311         kfree(pipe->bufs);                       1298         kfree(pipe->bufs);
1312         pipe->bufs = bufs;                       1299         pipe->bufs = bufs;
1313         pipe->ring_size = nr_slots;              1300         pipe->ring_size = nr_slots;
1314         if (pipe->max_usage > nr_slots)          1301         if (pipe->max_usage > nr_slots)
1315                 pipe->max_usage = nr_slots;      1302                 pipe->max_usage = nr_slots;
1316         pipe->tail = tail;                       1303         pipe->tail = tail;
1317         pipe->head = head;                       1304         pipe->head = head;
1318                                                  1305 
1319         if (!pipe_has_watch_queue(pipe)) {    << 
1320                 pipe->max_usage = nr_slots;   << 
1321                 pipe->nr_accounted = nr_slots << 
1322         }                                     << 
1323                                               << 
1324         spin_unlock_irq(&pipe->rd_wait.lock);    1306         spin_unlock_irq(&pipe->rd_wait.lock);
1325                                                  1307 
1326         /* This might have made more room for    1308         /* This might have made more room for writers */
1327         wake_up_interruptible(&pipe->wr_wait)    1309         wake_up_interruptible(&pipe->wr_wait);
1328         return 0;                                1310         return 0;
1329 }                                                1311 }
1330                                                  1312 
1331 /*                                               1313 /*
1332  * Allocate a new array of pipe buffers and c    1314  * Allocate a new array of pipe buffers and copy the info over. Returns the
1333  * pipe size if successful, or return -ERROR     1315  * pipe size if successful, or return -ERROR on error.
1334  */                                              1316  */
1335 static long pipe_set_size(struct pipe_inode_i !! 1317 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1336 {                                                1318 {
1337         unsigned long user_bufs;                 1319         unsigned long user_bufs;
1338         unsigned int nr_slots, size;             1320         unsigned int nr_slots, size;
1339         long ret = 0;                            1321         long ret = 0;
1340                                                  1322 
1341         if (pipe_has_watch_queue(pipe))       !! 1323 #ifdef CONFIG_WATCH_QUEUE
                                                   >> 1324         if (pipe->watch_queue)
1342                 return -EBUSY;                   1325                 return -EBUSY;
                                                   >> 1326 #endif
1343                                                  1327 
1344         size = round_pipe_size(arg);             1328         size = round_pipe_size(arg);
1345         nr_slots = size >> PAGE_SHIFT;           1329         nr_slots = size >> PAGE_SHIFT;
1346                                                  1330 
1347         if (!nr_slots)                           1331         if (!nr_slots)
1348                 return -EINVAL;                  1332                 return -EINVAL;
1349                                                  1333 
1350         /*                                       1334         /*
1351          * If trying to increase the pipe cap    1335          * If trying to increase the pipe capacity, check that an
1352          * unprivileged user is not trying to    1336          * unprivileged user is not trying to exceed various limits
1353          * (soft limit check here, hard limit    1337          * (soft limit check here, hard limit check just below).
1354          * Decreasing the pipe capacity is al    1338          * Decreasing the pipe capacity is always permitted, even
1355          * if the user is currently over a li    1339          * if the user is currently over a limit.
1356          */                                      1340          */
1357         if (nr_slots > pipe->max_usage &&        1341         if (nr_slots > pipe->max_usage &&
1358                         size > pipe_max_size     1342                         size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
1359                 return -EPERM;                   1343                 return -EPERM;
1360                                                  1344 
1361         user_bufs = account_pipe_buffers(pipe    1345         user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
1362                                                  1346 
1363         if (nr_slots > pipe->max_usage &&        1347         if (nr_slots > pipe->max_usage &&
1364                         (too_many_pipe_buffer    1348                         (too_many_pipe_buffers_hard(user_bufs) ||
1365                          too_many_pipe_buffer    1349                          too_many_pipe_buffers_soft(user_bufs)) &&
1366                         pipe_is_unprivileged_    1350                         pipe_is_unprivileged_user()) {
1367                 ret = -EPERM;                    1351                 ret = -EPERM;
1368                 goto out_revert_acct;            1352                 goto out_revert_acct;
1369         }                                        1353         }
1370                                                  1354 
1371         ret = pipe_resize_ring(pipe, nr_slots    1355         ret = pipe_resize_ring(pipe, nr_slots);
1372         if (ret < 0)                             1356         if (ret < 0)
1373                 goto out_revert_acct;            1357                 goto out_revert_acct;
1374                                                  1358 
                                                   >> 1359         pipe->max_usage = nr_slots;
                                                   >> 1360         pipe->nr_accounted = nr_slots;
1375         return pipe->max_usage * PAGE_SIZE;      1361         return pipe->max_usage * PAGE_SIZE;
1376                                                  1362 
1377 out_revert_acct:                                 1363 out_revert_acct:
1378         (void) account_pipe_buffers(pipe->use    1364         (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
1379         return ret;                              1365         return ret;
1380 }                                                1366 }
1381                                                  1367 
1382 /*                                               1368 /*
1383  * Note that i_pipe and i_cdev share the same    1369  * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
1384  * not enough to verify that this is a pipe.     1370  * not enough to verify that this is a pipe.
1385  */                                              1371  */
1386 struct pipe_inode_info *get_pipe_info(struct     1372 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
1387 {                                                1373 {
1388         struct pipe_inode_info *pipe = file->    1374         struct pipe_inode_info *pipe = file->private_data;
1389                                                  1375 
1390         if (file->f_op != &pipefifo_fops || !    1376         if (file->f_op != &pipefifo_fops || !pipe)
1391                 return NULL;                     1377                 return NULL;
1392         if (for_splice && pipe_has_watch_queu !! 1378 #ifdef CONFIG_WATCH_QUEUE
                                                   >> 1379         if (for_splice && pipe->watch_queue)
1393                 return NULL;                     1380                 return NULL;
                                                   >> 1381 #endif
1394         return pipe;                             1382         return pipe;
1395 }                                                1383 }
1396                                                  1384 
1397 long pipe_fcntl(struct file *file, unsigned i !! 1385 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1398 {                                                1386 {
1399         struct pipe_inode_info *pipe;            1387         struct pipe_inode_info *pipe;
1400         long ret;                                1388         long ret;
1401                                                  1389 
1402         pipe = get_pipe_info(file, false);       1390         pipe = get_pipe_info(file, false);
1403         if (!pipe)                               1391         if (!pipe)
1404                 return -EBADF;                   1392                 return -EBADF;
1405                                                  1393 
1406         mutex_lock(&pipe->mutex);             !! 1394         __pipe_lock(pipe);
1407                                                  1395 
1408         switch (cmd) {                           1396         switch (cmd) {
1409         case F_SETPIPE_SZ:                       1397         case F_SETPIPE_SZ:
1410                 ret = pipe_set_size(pipe, arg    1398                 ret = pipe_set_size(pipe, arg);
1411                 break;                           1399                 break;
1412         case F_GETPIPE_SZ:                       1400         case F_GETPIPE_SZ:
1413                 ret = pipe->max_usage * PAGE_    1401                 ret = pipe->max_usage * PAGE_SIZE;
1414                 break;                           1402                 break;
1415         default:                                 1403         default:
1416                 ret = -EINVAL;                   1404                 ret = -EINVAL;
1417                 break;                           1405                 break;
1418         }                                        1406         }
1419                                                  1407 
1420         mutex_unlock(&pipe->mutex);           !! 1408         __pipe_unlock(pipe);
1421         return ret;                              1409         return ret;
1422 }                                                1410 }
1423                                                  1411 
1424 static const struct super_operations pipefs_o    1412 static const struct super_operations pipefs_ops = {
1425         .destroy_inode = free_inode_nonrcu,      1413         .destroy_inode = free_inode_nonrcu,
1426         .statfs = simple_statfs,                 1414         .statfs = simple_statfs,
1427 };                                               1415 };
1428                                                  1416 
1429 /*                                               1417 /*
1430  * pipefs should _never_ be mounted by userla    1418  * pipefs should _never_ be mounted by userland - too much of security hassle,
1431  * no real gain from having the whole file sy !! 1419  * no real gain from having the whole whorehouse mounted. So we don't need
1432  * any operations on the root directory. Howe    1420  * any operations on the root directory. However, we need a non-trivial
1433  * d_name - pipe: will go nicely and kill the    1421  * d_name - pipe: will go nicely and kill the special-casing in procfs.
1434  */                                              1422  */
1435                                                  1423 
1436 static int pipefs_init_fs_context(struct fs_c    1424 static int pipefs_init_fs_context(struct fs_context *fc)
1437 {                                                1425 {
1438         struct pseudo_fs_context *ctx = init_    1426         struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
1439         if (!ctx)                                1427         if (!ctx)
1440                 return -ENOMEM;                  1428                 return -ENOMEM;
1441         ctx->ops = &pipefs_ops;                  1429         ctx->ops = &pipefs_ops;
1442         ctx->dops = &pipefs_dentry_operations    1430         ctx->dops = &pipefs_dentry_operations;
1443         return 0;                                1431         return 0;
1444 }                                                1432 }
1445                                                  1433 
1446 static struct file_system_type pipe_fs_type =    1434 static struct file_system_type pipe_fs_type = {
1447         .name           = "pipefs",              1435         .name           = "pipefs",
1448         .init_fs_context = pipefs_init_fs_con    1436         .init_fs_context = pipefs_init_fs_context,
1449         .kill_sb        = kill_anon_super,       1437         .kill_sb        = kill_anon_super,
1450 };                                               1438 };
1451                                                  1439 
1452 #ifdef CONFIG_SYSCTL                             1440 #ifdef CONFIG_SYSCTL
1453 static int do_proc_dopipe_max_size_conv(unsig    1441 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
1454                                         unsig    1442                                         unsigned int *valp,
1455                                         int w    1443                                         int write, void *data)
1456 {                                                1444 {
1457         if (write) {                             1445         if (write) {
1458                 unsigned int val;                1446                 unsigned int val;
1459                                                  1447 
1460                 val = round_pipe_size(*lvalp)    1448                 val = round_pipe_size(*lvalp);
1461                 if (val == 0)                    1449                 if (val == 0)
1462                         return -EINVAL;          1450                         return -EINVAL;
1463                                                  1451 
1464                 *valp = val;                     1452                 *valp = val;
1465         } else {                                 1453         } else {
1466                 unsigned int val = *valp;        1454                 unsigned int val = *valp;
1467                 *lvalp = (unsigned long) val;    1455                 *lvalp = (unsigned long) val;
1468         }                                        1456         }
1469                                                  1457 
1470         return 0;                                1458         return 0;
1471 }                                                1459 }
1472                                                  1460 
1473 static int proc_dopipe_max_size(const struct  !! 1461 static int proc_dopipe_max_size(struct ctl_table *table, int write,
1474                                 void *buffer,    1462                                 void *buffer, size_t *lenp, loff_t *ppos)
1475 {                                                1463 {
1476         return do_proc_douintvec(table, write    1464         return do_proc_douintvec(table, write, buffer, lenp, ppos,
1477                                  do_proc_dopi    1465                                  do_proc_dopipe_max_size_conv, NULL);
1478 }                                                1466 }
1479                                                  1467 
1480 static struct ctl_table fs_pipe_sysctls[] = {    1468 static struct ctl_table fs_pipe_sysctls[] = {
1481         {                                        1469         {
1482                 .procname       = "pipe-max-s    1470                 .procname       = "pipe-max-size",
1483                 .data           = &pipe_max_s    1471                 .data           = &pipe_max_size,
1484                 .maxlen         = sizeof(pipe    1472                 .maxlen         = sizeof(pipe_max_size),
1485                 .mode           = 0644,          1473                 .mode           = 0644,
1486                 .proc_handler   = proc_dopipe    1474                 .proc_handler   = proc_dopipe_max_size,
1487         },                                       1475         },
1488         {                                        1476         {
1489                 .procname       = "pipe-user-    1477                 .procname       = "pipe-user-pages-hard",
1490                 .data           = &pipe_user_    1478                 .data           = &pipe_user_pages_hard,
1491                 .maxlen         = sizeof(pipe    1479                 .maxlen         = sizeof(pipe_user_pages_hard),
1492                 .mode           = 0644,          1480                 .mode           = 0644,
1493                 .proc_handler   = proc_doulon    1481                 .proc_handler   = proc_doulongvec_minmax,
1494         },                                       1482         },
1495         {                                        1483         {
1496                 .procname       = "pipe-user-    1484                 .procname       = "pipe-user-pages-soft",
1497                 .data           = &pipe_user_    1485                 .data           = &pipe_user_pages_soft,
1498                 .maxlen         = sizeof(pipe    1486                 .maxlen         = sizeof(pipe_user_pages_soft),
1499                 .mode           = 0644,          1487                 .mode           = 0644,
1500                 .proc_handler   = proc_doulon    1488                 .proc_handler   = proc_doulongvec_minmax,
1501         },                                       1489         },
                                                   >> 1490         { }
1502 };                                               1491 };
1503 #endif                                           1492 #endif
1504                                                  1493 
1505 static int __init init_pipe_fs(void)             1494 static int __init init_pipe_fs(void)
1506 {                                                1495 {
1507         int err = register_filesystem(&pipe_f    1496         int err = register_filesystem(&pipe_fs_type);
1508                                                  1497 
1509         if (!err) {                              1498         if (!err) {
1510                 pipe_mnt = kern_mount(&pipe_f    1499                 pipe_mnt = kern_mount(&pipe_fs_type);
1511                 if (IS_ERR(pipe_mnt)) {          1500                 if (IS_ERR(pipe_mnt)) {
1512                         err = PTR_ERR(pipe_mn    1501                         err = PTR_ERR(pipe_mnt);
1513                         unregister_filesystem    1502                         unregister_filesystem(&pipe_fs_type);
1514                 }                                1503                 }
1515         }                                        1504         }
1516 #ifdef CONFIG_SYSCTL                             1505 #ifdef CONFIG_SYSCTL
1517         register_sysctl_init("fs", fs_pipe_sy    1506         register_sysctl_init("fs", fs_pipe_sysctls);
1518 #endif                                           1507 #endif
1519         return err;                              1508         return err;
1520 }                                                1509 }
1521                                                  1510 
1522 fs_initcall(init_pipe_fs);                       1511 fs_initcall(init_pipe_fs);
1523                                                  1512 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php