~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/fs/pipe.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /fs/pipe.c (Version linux-6.12-rc7) and /fs/pipe.c (Version linux-5.6.19)


  1 // SPDX-License-Identifier: GPL-2.0                 1 // SPDX-License-Identifier: GPL-2.0
  2 /*                                                  2 /*
  3  *  linux/fs/pipe.c                                 3  *  linux/fs/pipe.c
  4  *                                                  4  *
  5  *  Copyright (C) 1991, 1992, 1999  Linus Torv      5  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
  6  */                                                 6  */
  7                                                     7 
  8 #include <linux/mm.h>                               8 #include <linux/mm.h>
  9 #include <linux/file.h>                             9 #include <linux/file.h>
 10 #include <linux/poll.h>                            10 #include <linux/poll.h>
 11 #include <linux/slab.h>                            11 #include <linux/slab.h>
 12 #include <linux/module.h>                          12 #include <linux/module.h>
 13 #include <linux/init.h>                            13 #include <linux/init.h>
 14 #include <linux/fs.h>                              14 #include <linux/fs.h>
 15 #include <linux/log2.h>                            15 #include <linux/log2.h>
 16 #include <linux/mount.h>                           16 #include <linux/mount.h>
 17 #include <linux/pseudo_fs.h>                       17 #include <linux/pseudo_fs.h>
 18 #include <linux/magic.h>                           18 #include <linux/magic.h>
 19 #include <linux/pipe_fs_i.h>                       19 #include <linux/pipe_fs_i.h>
 20 #include <linux/uio.h>                             20 #include <linux/uio.h>
 21 #include <linux/highmem.h>                         21 #include <linux/highmem.h>
 22 #include <linux/pagemap.h>                         22 #include <linux/pagemap.h>
 23 #include <linux/audit.h>                           23 #include <linux/audit.h>
 24 #include <linux/syscalls.h>                        24 #include <linux/syscalls.h>
 25 #include <linux/fcntl.h>                           25 #include <linux/fcntl.h>
 26 #include <linux/memcontrol.h>                      26 #include <linux/memcontrol.h>
 27 #include <linux/watch_queue.h>                 << 
 28 #include <linux/sysctl.h>                      << 
 29                                                    27 
 30 #include <linux/uaccess.h>                         28 #include <linux/uaccess.h>
 31 #include <asm/ioctls.h>                            29 #include <asm/ioctls.h>
 32                                                    30 
 33 #include "internal.h"                              31 #include "internal.h"
 34                                                    32 
 35 /*                                                 33 /*
 36  * New pipe buffers will be restricted to this << 
 37  * their pipe buffer quota. The general pipe u << 
 38  * buffers: one for data yet to be read, and o << 
 39  * than two, then a write to a non-empty pipe  << 
 40  * full. This can occur with GNU make jobserve << 
 41  * semaphores: multiple processes may be waiti << 
 42  * pipe before reading tokens: https://lore.ke << 
 43  *                                             << 
 44  * Users can reduce their pipe buffers with F_ << 
 45  * own risk, namely: pipe writes to non-full p << 
 46  * emptied.                                    << 
 47  */                                            << 
 48 #define PIPE_MIN_DEF_BUFFERS 2                 << 
 49                                                << 
 50 /*                                             << 
 51  * The max size that a non-root user is allowe     34  * The max size that a non-root user is allowed to grow the pipe. Can
 52  * be set by root in /proc/sys/fs/pipe-max-siz     35  * be set by root in /proc/sys/fs/pipe-max-size
 53  */                                                36  */
 54 static unsigned int pipe_max_size = 1048576;   !!  37 unsigned int pipe_max_size = 1048576;
 55                                                    38 
 56 /* Maximum allocatable pages per user. Hard li     39 /* Maximum allocatable pages per user. Hard limit is unset by default, soft
 57  * matches default values.                         40  * matches default values.
 58  */                                                41  */
 59 static unsigned long pipe_user_pages_hard;     !!  42 unsigned long pipe_user_pages_hard;
 60 static unsigned long pipe_user_pages_soft = PI !!  43 unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
 61                                                    44 
 62 /*                                                 45 /*
 63  * We use head and tail indices that aren't ma     46  * We use head and tail indices that aren't masked off, except at the point of
 64  * dereference, but rather they're allowed to      47  * dereference, but rather they're allowed to wrap naturally.  This means there
 65  * isn't a dead spot in the buffer, but the ri     48  * isn't a dead spot in the buffer, but the ring has to be a power of two and
 66  * <= 2^31.                                        49  * <= 2^31.
 67  * -- David Howells 2019-09-23.                    50  * -- David Howells 2019-09-23.
 68  *                                                 51  *
 69  * Reads with count = 0 should always return 0     52  * Reads with count = 0 should always return 0.
 70  * -- Julian Bradfield 1999-06-07.                 53  * -- Julian Bradfield 1999-06-07.
 71  *                                                 54  *
 72  * FIFOs and Pipes now generate SIGIO for both     55  * FIFOs and Pipes now generate SIGIO for both readers and writers.
 73  * -- Jeremy Elson <jelson@circlemud.org> 2001     56  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
 74  *                                                 57  *
 75  * pipe_read & write cleanup                       58  * pipe_read & write cleanup
 76  * -- Manfred Spraul <manfred@colorfullife.com     59  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
 77  */                                                60  */
 78                                                    61 
 79 #define cmp_int(l, r)           ((l > r) - (l  !!  62 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 80                                                << 
 81 #ifdef CONFIG_PROVE_LOCKING                    << 
 82 static int pipe_lock_cmp_fn(const struct lockd << 
 83                             const struct lockd << 
 84 {                                                  63 {
 85         return cmp_int((unsigned long) a, (uns !!  64         if (pipe->files)
                                                   >>  65                 mutex_lock_nested(&pipe->mutex, subclass);
 86 }                                                  66 }
 87 #endif                                         << 
 88                                                    67 
 89 void pipe_lock(struct pipe_inode_info *pipe)       68 void pipe_lock(struct pipe_inode_info *pipe)
 90 {                                                  69 {
 91         if (pipe->files)                       !!  70         /*
 92                 mutex_lock(&pipe->mutex);      !!  71          * pipe_lock() nests non-pipe inode locks (for writing to a file)
                                                   >>  72          */
                                                   >>  73         pipe_lock_nested(pipe, I_MUTEX_PARENT);
 93 }                                                  74 }
 94 EXPORT_SYMBOL(pipe_lock);                          75 EXPORT_SYMBOL(pipe_lock);
 95                                                    76 
 96 void pipe_unlock(struct pipe_inode_info *pipe)     77 void pipe_unlock(struct pipe_inode_info *pipe)
 97 {                                                  78 {
 98         if (pipe->files)                           79         if (pipe->files)
 99                 mutex_unlock(&pipe->mutex);        80                 mutex_unlock(&pipe->mutex);
100 }                                                  81 }
101 EXPORT_SYMBOL(pipe_unlock);                        82 EXPORT_SYMBOL(pipe_unlock);
102                                                    83 
                                                   >>  84 static inline void __pipe_lock(struct pipe_inode_info *pipe)
                                                   >>  85 {
                                                   >>  86         mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
                                                   >>  87 }
                                                   >>  88 
                                                   >>  89 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
                                                   >>  90 {
                                                   >>  91         mutex_unlock(&pipe->mutex);
                                                   >>  92 }
                                                   >>  93 
103 void pipe_double_lock(struct pipe_inode_info *     94 void pipe_double_lock(struct pipe_inode_info *pipe1,
104                       struct pipe_inode_info *     95                       struct pipe_inode_info *pipe2)
105 {                                                  96 {
106         BUG_ON(pipe1 == pipe2);                    97         BUG_ON(pipe1 == pipe2);
107                                                    98 
108         if (pipe1 > pipe2)                     !!  99         if (pipe1 < pipe2) {
109                 swap(pipe1, pipe2);            !! 100                 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
                                                   >> 101                 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
                                                   >> 102         } else {
                                                   >> 103                 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
                                                   >> 104                 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
                                                   >> 105         }
                                                   >> 106 }
                                                   >> 107 
                                                   >> 108 /* Drop the inode semaphore and wait for a pipe event, atomically */
                                                   >> 109 void pipe_wait(struct pipe_inode_info *pipe)
                                                   >> 110 {
                                                   >> 111         DEFINE_WAIT(rdwait);
                                                   >> 112         DEFINE_WAIT(wrwait);
110                                                   113 
111         pipe_lock(pipe1);                      !! 114         /*
112         pipe_lock(pipe2);                      !! 115          * Pipes are system-local resources, so sleeping on them
                                                   >> 116          * is considered a noninteractive wait:
                                                   >> 117          */
                                                   >> 118         prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
                                                   >> 119         prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
                                                   >> 120         pipe_unlock(pipe);
                                                   >> 121         schedule();
                                                   >> 122         finish_wait(&pipe->rd_wait, &rdwait);
                                                   >> 123         finish_wait(&pipe->wr_wait, &wrwait);
                                                   >> 124         pipe_lock(pipe);
113 }                                                 125 }
114                                                   126 
115 static void anon_pipe_buf_release(struct pipe_    127 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
116                                   struct pipe_    128                                   struct pipe_buffer *buf)
117 {                                                 129 {
118         struct page *page = buf->page;            130         struct page *page = buf->page;
119                                                   131 
120         /*                                        132         /*
121          * If nobody else uses this page, and     133          * If nobody else uses this page, and we don't already have a
122          * temporary page, let's keep track of    134          * temporary page, let's keep track of it as a one-deep
123          * allocation cache. (Otherwise just r    135          * allocation cache. (Otherwise just release our reference to it)
124          */                                       136          */
125         if (page_count(page) == 1 && !pipe->tm    137         if (page_count(page) == 1 && !pipe->tmp_page)
126                 pipe->tmp_page = page;            138                 pipe->tmp_page = page;
127         else                                      139         else
128                 put_page(page);                   140                 put_page(page);
129 }                                                 141 }
130                                                   142 
131 static bool anon_pipe_buf_try_steal(struct pip !! 143 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
132                 struct pipe_buffer *buf)       !! 144                                struct pipe_buffer *buf)
133 {                                                 145 {
134         struct page *page = buf->page;            146         struct page *page = buf->page;
135                                                   147 
136         if (page_count(page) != 1)             !! 148         if (page_count(page) == 1) {
137                 return false;                  !! 149                 memcg_kmem_uncharge(page, 0);
138         memcg_kmem_uncharge_page(page, 0);     !! 150                 __SetPageLocked(page);
139         __SetPageLocked(page);                 !! 151                 return 0;
140         return true;                           !! 152         }
                                                   >> 153         return 1;
141 }                                                 154 }
142                                                   155 
143 /**                                               156 /**
144  * generic_pipe_buf_try_steal - attempt to tak !! 157  * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
145  * @pipe:       the pipe that the buffer belon    158  * @pipe:       the pipe that the buffer belongs to
146  * @buf:        the buffer to attempt to steal    159  * @buf:        the buffer to attempt to steal
147  *                                                160  *
148  * Description:                                   161  * Description:
149  *      This function attempts to steal the &s    162  *      This function attempts to steal the &struct page attached to
150  *      @buf. If successful, this function ret    163  *      @buf. If successful, this function returns 0 and returns with
151  *      the page locked. The caller may then r    164  *      the page locked. The caller may then reuse the page for whatever
152  *      he wishes; the typical use is insertio    165  *      he wishes; the typical use is insertion into a different file
153  *      page cache.                               166  *      page cache.
154  */                                               167  */
155 bool generic_pipe_buf_try_steal(struct pipe_in !! 168 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
156                 struct pipe_buffer *buf)       !! 169                            struct pipe_buffer *buf)
157 {                                                 170 {
158         struct page *page = buf->page;            171         struct page *page = buf->page;
159                                                   172 
160         /*                                        173         /*
161          * A reference of one is golden, that     174          * A reference of one is golden, that means that the owner of this
162          * page is the only one holding a refe    175          * page is the only one holding a reference to it. lock the page
163          * and return OK.                         176          * and return OK.
164          */                                       177          */
165         if (page_count(page) == 1) {              178         if (page_count(page) == 1) {
166                 lock_page(page);                  179                 lock_page(page);
167                 return true;                   !! 180                 return 0;
168         }                                         181         }
169         return false;                          !! 182 
                                                   >> 183         return 1;
170 }                                                 184 }
171 EXPORT_SYMBOL(generic_pipe_buf_try_steal);     !! 185 EXPORT_SYMBOL(generic_pipe_buf_steal);
172                                                   186 
173 /**                                               187 /**
174  * generic_pipe_buf_get - get a reference to a    188  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
175  * @pipe:       the pipe that the buffer belon    189  * @pipe:       the pipe that the buffer belongs to
176  * @buf:        the buffer to get a reference     190  * @buf:        the buffer to get a reference to
177  *                                                191  *
178  * Description:                                   192  * Description:
179  *      This function grabs an extra reference    193  *      This function grabs an extra reference to @buf. It's used in
180  *      the tee() system call, when we duplica !! 194  *      in the tee() system call, when we duplicate the buffers in one
181  *      pipe into another.                        195  *      pipe into another.
182  */                                               196  */
183 bool generic_pipe_buf_get(struct pipe_inode_in    197 bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
184 {                                                 198 {
185         return try_get_page(buf->page);           199         return try_get_page(buf->page);
186 }                                                 200 }
187 EXPORT_SYMBOL(generic_pipe_buf_get);              201 EXPORT_SYMBOL(generic_pipe_buf_get);
188                                                   202 
189 /**                                               203 /**
                                                   >> 204  * generic_pipe_buf_confirm - verify contents of the pipe buffer
                                                   >> 205  * @info:       the pipe that the buffer belongs to
                                                   >> 206  * @buf:        the buffer to confirm
                                                   >> 207  *
                                                   >> 208  * Description:
                                                   >> 209  *      This function does nothing, because the generic pipe code uses
                                                   >> 210  *      pages that are always good when inserted into the pipe.
                                                   >> 211  */
                                                   >> 212 int generic_pipe_buf_confirm(struct pipe_inode_info *info,
                                                   >> 213                              struct pipe_buffer *buf)
                                                   >> 214 {
                                                   >> 215         return 0;
                                                   >> 216 }
                                                   >> 217 EXPORT_SYMBOL(generic_pipe_buf_confirm);
                                                   >> 218 
                                                   >> 219 /**
190  * generic_pipe_buf_release - put a reference     220  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
191  * @pipe:       the pipe that the buffer belon    221  * @pipe:       the pipe that the buffer belongs to
192  * @buf:        the buffer to put a reference     222  * @buf:        the buffer to put a reference to
193  *                                                223  *
194  * Description:                                   224  * Description:
195  *      This function releases a reference to     225  *      This function releases a reference to @buf.
196  */                                               226  */
197 void generic_pipe_buf_release(struct pipe_inod    227 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
198                               struct pipe_buff    228                               struct pipe_buffer *buf)
199 {                                                 229 {
200         put_page(buf->page);                      230         put_page(buf->page);
201 }                                                 231 }
202 EXPORT_SYMBOL(generic_pipe_buf_release);          232 EXPORT_SYMBOL(generic_pipe_buf_release);
203                                                   233 
                                                   >> 234 /* New data written to a pipe may be appended to a buffer with this type. */
204 static const struct pipe_buf_operations anon_p    235 static const struct pipe_buf_operations anon_pipe_buf_ops = {
205         .release        = anon_pipe_buf_releas !! 236         .confirm = generic_pipe_buf_confirm,
206         .try_steal      = anon_pipe_buf_try_st !! 237         .release = anon_pipe_buf_release,
207         .get            = generic_pipe_buf_get !! 238         .steal = anon_pipe_buf_steal,
                                                   >> 239         .get = generic_pipe_buf_get,
                                                   >> 240 };
                                                   >> 241 
                                                   >> 242 static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = {
                                                   >> 243         .confirm = generic_pipe_buf_confirm,
                                                   >> 244         .release = anon_pipe_buf_release,
                                                   >> 245         .steal = anon_pipe_buf_steal,
                                                   >> 246         .get = generic_pipe_buf_get,
208 };                                                247 };
209                                                   248 
                                                   >> 249 static const struct pipe_buf_operations packet_pipe_buf_ops = {
                                                   >> 250         .confirm = generic_pipe_buf_confirm,
                                                   >> 251         .release = anon_pipe_buf_release,
                                                   >> 252         .steal = anon_pipe_buf_steal,
                                                   >> 253         .get = generic_pipe_buf_get,
                                                   >> 254 };
                                                   >> 255 
                                                   >> 256 /**
                                                   >> 257  * pipe_buf_mark_unmergeable - mark a &struct pipe_buffer as unmergeable
                                                   >> 258  * @buf:        the buffer to mark
                                                   >> 259  *
                                                   >> 260  * Description:
                                                   >> 261  *      This function ensures that no future writes will be merged into the
                                                   >> 262  *      given &struct pipe_buffer. This is necessary when multiple pipe buffers
                                                   >> 263  *      share the same backing page.
                                                   >> 264  */
                                                   >> 265 void pipe_buf_mark_unmergeable(struct pipe_buffer *buf)
                                                   >> 266 {
                                                   >> 267         if (buf->ops == &anon_pipe_buf_ops)
                                                   >> 268                 buf->ops = &anon_pipe_buf_nomerge_ops;
                                                   >> 269 }
                                                   >> 270 
                                                   >> 271 static bool pipe_buf_can_merge(struct pipe_buffer *buf)
                                                   >> 272 {
                                                   >> 273         return buf->ops == &anon_pipe_buf_ops;
                                                   >> 274 }
                                                   >> 275 
210 /* Done while waiting without holding the pipe    276 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
211 static inline bool pipe_readable(const struct     277 static inline bool pipe_readable(const struct pipe_inode_info *pipe)
212 {                                                 278 {
213         unsigned int head = READ_ONCE(pipe->he    279         unsigned int head = READ_ONCE(pipe->head);
214         unsigned int tail = READ_ONCE(pipe->ta    280         unsigned int tail = READ_ONCE(pipe->tail);
215         unsigned int writers = READ_ONCE(pipe-    281         unsigned int writers = READ_ONCE(pipe->writers);
216                                                   282 
217         return !pipe_empty(head, tail) || !wri    283         return !pipe_empty(head, tail) || !writers;
218 }                                                 284 }
219                                                   285 
220 static inline unsigned int pipe_update_tail(st << 
221                                             st << 
222                                             un << 
223 {                                              << 
224         pipe_buf_release(pipe, buf);           << 
225                                                << 
226         /*                                     << 
227          * If the pipe has a watch_queue, we n << 
228          * by the spinlock because notificatio << 
229          * this spinlock, no mutex             << 
230          */                                    << 
231         if (pipe_has_watch_queue(pipe)) {      << 
232                 spin_lock_irq(&pipe->rd_wait.l << 
233 #ifdef CONFIG_WATCH_QUEUE                      << 
234                 if (buf->flags & PIPE_BUF_FLAG << 
235                         pipe->note_loss = true << 
236 #endif                                         << 
237                 pipe->tail = ++tail;           << 
238                 spin_unlock_irq(&pipe->rd_wait << 
239                 return tail;                   << 
240         }                                      << 
241                                                << 
242         /*                                     << 
243          * Without a watch_queue, we can simpl << 
244          * without the spinlock - the mutex is << 
245          */                                    << 
246         pipe->tail = ++tail;                   << 
247         return tail;                           << 
248 }                                              << 
249                                                << 
250 static ssize_t                                    286 static ssize_t
251 pipe_read(struct kiocb *iocb, struct iov_iter     287 pipe_read(struct kiocb *iocb, struct iov_iter *to)
252 {                                                 288 {
253         size_t total_len = iov_iter_count(to);    289         size_t total_len = iov_iter_count(to);
254         struct file *filp = iocb->ki_filp;        290         struct file *filp = iocb->ki_filp;
255         struct pipe_inode_info *pipe = filp->p    291         struct pipe_inode_info *pipe = filp->private_data;
256         bool was_full, wake_next_reader = fals    292         bool was_full, wake_next_reader = false;
257         ssize_t ret;                              293         ssize_t ret;
258                                                   294 
259         /* Null read succeeds. */                 295         /* Null read succeeds. */
260         if (unlikely(total_len == 0))             296         if (unlikely(total_len == 0))
261                 return 0;                         297                 return 0;
262                                                   298 
263         ret = 0;                                  299         ret = 0;
264         mutex_lock(&pipe->mutex);              !! 300         __pipe_lock(pipe);
265                                                   301 
266         /*                                        302         /*
267          * We only wake up writers if the pipe    303          * We only wake up writers if the pipe was full when we started
268          * reading in order to avoid unnecessa    304          * reading in order to avoid unnecessary wakeups.
269          *                                        305          *
270          * But when we do wake up writers, we     306          * But when we do wake up writers, we do so using a sync wakeup
271          * (WF_SYNC), because we want them to     307          * (WF_SYNC), because we want them to get going and generate more
272          * data for us.                           308          * data for us.
273          */                                       309          */
274         was_full = pipe_full(pipe->head, pipe-    310         was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
275         for (;;) {                                311         for (;;) {
276                 /* Read ->head with a barrier  !! 312                 unsigned int head = pipe->head;
277                 unsigned int head = smp_load_a << 
278                 unsigned int tail = pipe->tail    313                 unsigned int tail = pipe->tail;
279                 unsigned int mask = pipe->ring    314                 unsigned int mask = pipe->ring_size - 1;
280                                                   315 
281 #ifdef CONFIG_WATCH_QUEUE                      << 
282                 if (pipe->note_loss) {         << 
283                         struct watch_notificat << 
284                                                << 
285                         if (total_len < 8) {   << 
286                                 if (ret == 0)  << 
287                                         ret =  << 
288                                 break;         << 
289                         }                      << 
290                                                << 
291                         n.type = WATCH_TYPE_ME << 
292                         n.subtype = WATCH_META << 
293                         n.info = watch_sizeof( << 
294                         if (copy_to_iter(&n, s << 
295                                 if (ret == 0)  << 
296                                         ret =  << 
297                                 break;         << 
298                         }                      << 
299                         ret += sizeof(n);      << 
300                         total_len -= sizeof(n) << 
301                         pipe->note_loss = fals << 
302                 }                              << 
303 #endif                                         << 
304                                                << 
305                 if (!pipe_empty(head, tail)) {    316                 if (!pipe_empty(head, tail)) {
306                         struct pipe_buffer *bu    317                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
307                         size_t chars = buf->le    318                         size_t chars = buf->len;
308                         size_t written;           319                         size_t written;
309                         int error;                320                         int error;
310                                                   321 
311                         if (chars > total_len) !! 322                         if (chars > total_len)
312                                 if (buf->flags << 
313                                         if (re << 
314                                                << 
315                                         break; << 
316                                 }              << 
317                                 chars = total_    323                                 chars = total_len;
318                         }                      << 
319                                                   324 
320                         error = pipe_buf_confi    325                         error = pipe_buf_confirm(pipe, buf);
321                         if (error) {              326                         if (error) {
322                                 if (!ret)         327                                 if (!ret)
323                                         ret =     328                                         ret = error;
324                                 break;            329                                 break;
325                         }                         330                         }
326                                                   331 
327                         written = copy_page_to    332                         written = copy_page_to_iter(buf->page, buf->offset, chars, to);
328                         if (unlikely(written <    333                         if (unlikely(written < chars)) {
329                                 if (!ret)         334                                 if (!ret)
330                                         ret =     335                                         ret = -EFAULT;
331                                 break;            336                                 break;
332                         }                         337                         }
333                         ret += chars;             338                         ret += chars;
334                         buf->offset += chars;     339                         buf->offset += chars;
335                         buf->len -= chars;        340                         buf->len -= chars;
336                                                   341 
337                         /* Was it a packet buf    342                         /* Was it a packet buffer? Clean up and exit */
338                         if (buf->flags & PIPE_    343                         if (buf->flags & PIPE_BUF_FLAG_PACKET) {
339                                 total_len = ch    344                                 total_len = chars;
340                                 buf->len = 0;     345                                 buf->len = 0;
341                         }                         346                         }
342                                                   347 
343                         if (!buf->len)         !! 348                         if (!buf->len) {
344                                 tail = pipe_up !! 349                                 pipe_buf_release(pipe, buf);
                                                   >> 350                                 spin_lock_irq(&pipe->rd_wait.lock);
                                                   >> 351                                 tail++;
                                                   >> 352                                 pipe->tail = tail;
                                                   >> 353                                 spin_unlock_irq(&pipe->rd_wait.lock);
                                                   >> 354                         }
345                         total_len -= chars;       355                         total_len -= chars;
346                         if (!total_len)           356                         if (!total_len)
347                                 break;  /* com    357                                 break;  /* common path: read succeeded */
348                         if (!pipe_empty(head,     358                         if (!pipe_empty(head, tail))    /* More to do? */
349                                 continue;         359                                 continue;
350                 }                                 360                 }
351                                                   361 
352                 if (!pipe->writers)               362                 if (!pipe->writers)
353                         break;                    363                         break;
354                 if (ret)                          364                 if (ret)
355                         break;                    365                         break;
356                 if ((filp->f_flags & O_NONBLOC !! 366                 if (filp->f_flags & O_NONBLOCK) {
357                     (iocb->ki_flags & IOCB_NOW << 
358                         ret = -EAGAIN;            367                         ret = -EAGAIN;
359                         break;                    368                         break;
360                 }                                 369                 }
361                 mutex_unlock(&pipe->mutex);    !! 370                 __pipe_unlock(pipe);
362                                                   371 
363                 /*                                372                 /*
364                  * We only get here if we didn    373                  * We only get here if we didn't actually read anything.
365                  *                                374                  *
366                  * However, we could have seen    375                  * However, we could have seen (and removed) a zero-sized
367                  * pipe buffer, and might have    376                  * pipe buffer, and might have made space in the buffers
368                  * that way.                      377                  * that way.
369                  *                                378                  *
370                  * You can't make zero-sized p    379                  * You can't make zero-sized pipe buffers by doing an empty
371                  * write (not even in packet m    380                  * write (not even in packet mode), but they can happen if
372                  * the writer gets an EFAULT w    381                  * the writer gets an EFAULT when trying to fill a buffer
373                  * that already got allocated     382                  * that already got allocated and inserted in the buffer
374                  * array.                         383                  * array.
375                  *                                384                  *
376                  * So we still need to wake up    385                  * So we still need to wake up any pending writers in the
377                  * _very_ unlikely case that t    386                  * _very_ unlikely case that the pipe was full, but we got
378                  * no data.                       387                  * no data.
379                  */                               388                  */
380                 if (unlikely(was_full))        !! 389                 if (unlikely(was_full)) {
381                         wake_up_interruptible_    390                         wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
382                 kill_fasync(&pipe->fasync_writ !! 391                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
                                                   >> 392                 }
383                                                   393 
384                 /*                                394                 /*
385                  * But because we didn't read     395                  * But because we didn't read anything, at this point we can
386                  * just return directly with -    396                  * just return directly with -ERESTARTSYS if we're interrupted,
387                  * since we've done any requir    397                  * since we've done any required wakeups and there's no need
388                  * to mark anything accessed.     398                  * to mark anything accessed. And we've dropped the lock.
389                  */                               399                  */
390                 if (wait_event_interruptible_e    400                 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
391                         return -ERESTARTSYS;      401                         return -ERESTARTSYS;
392                                                   402 
393                 mutex_lock(&pipe->mutex);      !! 403                 __pipe_lock(pipe);
394                 was_full = pipe_full(pipe->hea    404                 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
395                 wake_next_reader = true;          405                 wake_next_reader = true;
396         }                                         406         }
397         if (pipe_empty(pipe->head, pipe->tail)    407         if (pipe_empty(pipe->head, pipe->tail))
398                 wake_next_reader = false;         408                 wake_next_reader = false;
399         mutex_unlock(&pipe->mutex);            !! 409         __pipe_unlock(pipe);
400                                                   410 
401         if (was_full)                          !! 411         if (was_full) {
402                 wake_up_interruptible_sync_pol    412                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
                                                   >> 413                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
                                                   >> 414         }
403         if (wake_next_reader)                     415         if (wake_next_reader)
404                 wake_up_interruptible_sync_pol    416                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
405         kill_fasync(&pipe->fasync_writers, SIG << 
406         if (ret > 0)                              417         if (ret > 0)
407                 file_accessed(filp);              418                 file_accessed(filp);
408         return ret;                               419         return ret;
409 }                                                 420 }
410                                                   421 
411 static inline int is_packetized(struct file *f    422 static inline int is_packetized(struct file *file)
412 {                                                 423 {
413         return (file->f_flags & O_DIRECT) != 0    424         return (file->f_flags & O_DIRECT) != 0;
414 }                                                 425 }
415                                                   426 
416 /* Done while waiting without holding the pipe    427 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
417 static inline bool pipe_writable(const struct     428 static inline bool pipe_writable(const struct pipe_inode_info *pipe)
418 {                                                 429 {
419         unsigned int head = READ_ONCE(pipe->he    430         unsigned int head = READ_ONCE(pipe->head);
420         unsigned int tail = READ_ONCE(pipe->ta    431         unsigned int tail = READ_ONCE(pipe->tail);
421         unsigned int max_usage = READ_ONCE(pip    432         unsigned int max_usage = READ_ONCE(pipe->max_usage);
422                                                   433 
423         return !pipe_full(head, tail, max_usag    434         return !pipe_full(head, tail, max_usage) ||
424                 !READ_ONCE(pipe->readers);        435                 !READ_ONCE(pipe->readers);
425 }                                                 436 }
426                                                   437 
427 static ssize_t                                    438 static ssize_t
428 pipe_write(struct kiocb *iocb, struct iov_iter    439 pipe_write(struct kiocb *iocb, struct iov_iter *from)
429 {                                                 440 {
430         struct file *filp = iocb->ki_filp;        441         struct file *filp = iocb->ki_filp;
431         struct pipe_inode_info *pipe = filp->p    442         struct pipe_inode_info *pipe = filp->private_data;
432         unsigned int head;                        443         unsigned int head;
433         ssize_t ret = 0;                          444         ssize_t ret = 0;
434         size_t total_len = iov_iter_count(from    445         size_t total_len = iov_iter_count(from);
435         ssize_t chars;                            446         ssize_t chars;
436         bool was_empty = false;                   447         bool was_empty = false;
437         bool wake_next_writer = false;            448         bool wake_next_writer = false;
438                                                   449 
439         /*                                     << 
440          * Reject writing to watch queue pipes << 
441          * the pipe.                           << 
442          * Otherwise, lockdep would be unhappy << 
443          * pipe locked.                        << 
444          * If we had to support locking a norm << 
445          * the same time, we could set up lock << 
446          * since we don't actually need that,  << 
447          */                                    << 
448         if (pipe_has_watch_queue(pipe))        << 
449                 return -EXDEV;                 << 
450                                                << 
451         /* Null write succeeds. */                450         /* Null write succeeds. */
452         if (unlikely(total_len == 0))             451         if (unlikely(total_len == 0))
453                 return 0;                         452                 return 0;
454                                                   453 
455         mutex_lock(&pipe->mutex);              !! 454         __pipe_lock(pipe);
456                                                   455 
457         if (!pipe->readers) {                     456         if (!pipe->readers) {
458                 send_sig(SIGPIPE, current, 0);    457                 send_sig(SIGPIPE, current, 0);
459                 ret = -EPIPE;                     458                 ret = -EPIPE;
460                 goto out;                         459                 goto out;
461         }                                         460         }
462                                                   461 
463         /*                                        462         /*
                                                   >> 463          * Only wake up if the pipe started out empty, since
                                                   >> 464          * otherwise there should be no readers waiting.
                                                   >> 465          *
464          * If it wasn't empty we try to merge     466          * If it wasn't empty we try to merge new data into
465          * the last buffer.                       467          * the last buffer.
466          *                                        468          *
467          * That naturally merges small writes,    469          * That naturally merges small writes, but it also
468          * page-aligns the rest of the writes  !! 470          * page-aligs the rest of the writes for large writes
469          * spanning multiple pages.               471          * spanning multiple pages.
470          */                                       472          */
471         head = pipe->head;                        473         head = pipe->head;
472         was_empty = pipe_empty(head, pipe->tai    474         was_empty = pipe_empty(head, pipe->tail);
473         chars = total_len & (PAGE_SIZE-1);        475         chars = total_len & (PAGE_SIZE-1);
474         if (chars && !was_empty) {                476         if (chars && !was_empty) {
475                 unsigned int mask = pipe->ring    477                 unsigned int mask = pipe->ring_size - 1;
476                 struct pipe_buffer *buf = &pip    478                 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
477                 int offset = buf->offset + buf    479                 int offset = buf->offset + buf->len;
478                                                   480 
479                 if ((buf->flags & PIPE_BUF_FLA !! 481                 if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) {
480                     offset + chars <= PAGE_SIZ << 
481                         ret = pipe_buf_confirm    482                         ret = pipe_buf_confirm(pipe, buf);
482                         if (ret)                  483                         if (ret)
483                                 goto out;         484                                 goto out;
484                                                   485 
485                         ret = copy_page_from_i    486                         ret = copy_page_from_iter(buf->page, offset, chars, from);
486                         if (unlikely(ret < cha    487                         if (unlikely(ret < chars)) {
487                                 ret = -EFAULT;    488                                 ret = -EFAULT;
488                                 goto out;         489                                 goto out;
489                         }                         490                         }
490                                                   491 
491                         buf->len += ret;          492                         buf->len += ret;
492                         if (!iov_iter_count(fr    493                         if (!iov_iter_count(from))
493                                 goto out;         494                                 goto out;
494                 }                                 495                 }
495         }                                         496         }
496                                                   497 
497         for (;;) {                                498         for (;;) {
498                 if (!pipe->readers) {             499                 if (!pipe->readers) {
499                         send_sig(SIGPIPE, curr    500                         send_sig(SIGPIPE, current, 0);
500                         if (!ret)                 501                         if (!ret)
501                                 ret = -EPIPE;     502                                 ret = -EPIPE;
502                         break;                    503                         break;
503                 }                                 504                 }
504                                                   505 
505                 head = pipe->head;                506                 head = pipe->head;
506                 if (!pipe_full(head, pipe->tai    507                 if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
507                         unsigned int mask = pi    508                         unsigned int mask = pipe->ring_size - 1;
508                         struct pipe_buffer *bu !! 509                         struct pipe_buffer *buf = &pipe->bufs[head & mask];
509                         struct page *page = pi    510                         struct page *page = pipe->tmp_page;
510                         int copied;               511                         int copied;
511                                                   512 
512                         if (!page) {              513                         if (!page) {
513                                 page = alloc_p    514                                 page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
514                                 if (unlikely(!    515                                 if (unlikely(!page)) {
515                                         ret =     516                                         ret = ret ? : -ENOMEM;
516                                         break;    517                                         break;
517                                 }                 518                                 }
518                                 pipe->tmp_page    519                                 pipe->tmp_page = page;
519                         }                         520                         }
520                                                   521 
521                         /* Allocate a slot in     522                         /* Allocate a slot in the ring in advance and attach an
522                          * empty buffer.  If w    523                          * empty buffer.  If we fault or otherwise fail to use
523                          * it, either the read    524                          * it, either the reader will consume it or it'll still
524                          * be there for the ne    525                          * be there for the next write.
525                          */                       526                          */
                                                   >> 527                         spin_lock_irq(&pipe->rd_wait.lock);
                                                   >> 528 
                                                   >> 529                         head = pipe->head;
                                                   >> 530                         if (pipe_full(head, pipe->tail, pipe->max_usage)) {
                                                   >> 531                                 spin_unlock_irq(&pipe->rd_wait.lock);
                                                   >> 532                                 continue;
                                                   >> 533                         }
                                                   >> 534 
526                         pipe->head = head + 1;    535                         pipe->head = head + 1;
                                                   >> 536                         spin_unlock_irq(&pipe->rd_wait.lock);
527                                                   537 
528                         /* Insert it into the     538                         /* Insert it into the buffer array */
529                         buf = &pipe->bufs[head    539                         buf = &pipe->bufs[head & mask];
530                         buf->page = page;         540                         buf->page = page;
531                         buf->ops = &anon_pipe_    541                         buf->ops = &anon_pipe_buf_ops;
532                         buf->offset = 0;          542                         buf->offset = 0;
533                         buf->len = 0;             543                         buf->len = 0;
534                         if (is_packetized(filp !! 544                         buf->flags = 0;
                                                   >> 545                         if (is_packetized(filp)) {
                                                   >> 546                                 buf->ops = &packet_pipe_buf_ops;
535                                 buf->flags = P    547                                 buf->flags = PIPE_BUF_FLAG_PACKET;
536                         else                   !! 548                         }
537                                 buf->flags = P << 
538                         pipe->tmp_page = NULL;    549                         pipe->tmp_page = NULL;
539                                                   550 
540                         copied = copy_page_fro    551                         copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
541                         if (unlikely(copied <     552                         if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
542                                 if (!ret)         553                                 if (!ret)
543                                         ret =     554                                         ret = -EFAULT;
544                                 break;            555                                 break;
545                         }                         556                         }
546                         ret += copied;            557                         ret += copied;
                                                   >> 558                         buf->offset = 0;
547                         buf->len = copied;        559                         buf->len = copied;
548                                                   560 
549                         if (!iov_iter_count(fr    561                         if (!iov_iter_count(from))
550                                 break;            562                                 break;
551                 }                                 563                 }
552                                                   564 
553                 if (!pipe_full(head, pipe->tai    565                 if (!pipe_full(head, pipe->tail, pipe->max_usage))
554                         continue;                 566                         continue;
555                                                   567 
556                 /* Wait for buffer space to be    568                 /* Wait for buffer space to become available. */
557                 if ((filp->f_flags & O_NONBLOC !! 569                 if (filp->f_flags & O_NONBLOCK) {
558                     (iocb->ki_flags & IOCB_NOW << 
559                         if (!ret)                 570                         if (!ret)
560                                 ret = -EAGAIN;    571                                 ret = -EAGAIN;
561                         break;                    572                         break;
562                 }                                 573                 }
563                 if (signal_pending(current)) {    574                 if (signal_pending(current)) {
564                         if (!ret)                 575                         if (!ret)
565                                 ret = -ERESTAR    576                                 ret = -ERESTARTSYS;
566                         break;                    577                         break;
567                 }                                 578                 }
568                                                   579 
569                 /*                                580                 /*
570                  * We're going to release the     581                  * We're going to release the pipe lock and wait for more
571                  * space. We wake up any reade    582                  * space. We wake up any readers if necessary, and then
572                  * after waiting we need to re    583                  * after waiting we need to re-check whether the pipe
573                  * become empty while we dropp    584                  * become empty while we dropped the lock.
574                  */                               585                  */
575                 mutex_unlock(&pipe->mutex);    !! 586                 __pipe_unlock(pipe);
576                 if (was_empty)                 !! 587                 if (was_empty) {
577                         wake_up_interruptible_    588                         wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
578                 kill_fasync(&pipe->fasync_read !! 589                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                                                   >> 590                 }
579                 wait_event_interruptible_exclu    591                 wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
580                 mutex_lock(&pipe->mutex);      !! 592                 __pipe_lock(pipe);
581                 was_empty = pipe_empty(pipe->h    593                 was_empty = pipe_empty(pipe->head, pipe->tail);
582                 wake_next_writer = true;          594                 wake_next_writer = true;
583         }                                         595         }
584 out:                                              596 out:
585         if (pipe_full(pipe->head, pipe->tail,     597         if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
586                 wake_next_writer = false;         598                 wake_next_writer = false;
587         mutex_unlock(&pipe->mutex);            !! 599         __pipe_unlock(pipe);
588                                                   600 
589         /*                                        601         /*
590          * If we do do a wakeup event, we do a    602          * If we do do a wakeup event, we do a 'sync' wakeup, because we
591          * want the reader to start processing    603          * want the reader to start processing things asap, rather than
592          * leave the data pending.                604          * leave the data pending.
593          *                                        605          *
594          * This is particularly important for     606          * This is particularly important for small writes, because of
595          * how (for example) the GNU make jobs    607          * how (for example) the GNU make jobserver uses small writes to
596          * wake up pending jobs                   608          * wake up pending jobs
597          *                                     << 
598          * Epoll nonsensically wants a wakeup  << 
599          * was already empty or not.           << 
600          */                                       609          */
601         if (was_empty || pipe->poll_usage)     !! 610         if (was_empty) {
602                 wake_up_interruptible_sync_pol    611                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
603         kill_fasync(&pipe->fasync_readers, SIG !! 612                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                                                   >> 613         }
604         if (wake_next_writer)                     614         if (wake_next_writer)
605                 wake_up_interruptible_sync_pol    615                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
606         if (ret > 0 && sb_start_write_trylock(    616         if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
607                 int err = file_update_time(fil    617                 int err = file_update_time(filp);
608                 if (err)                          618                 if (err)
609                         ret = err;                619                         ret = err;
610                 sb_end_write(file_inode(filp)-    620                 sb_end_write(file_inode(filp)->i_sb);
611         }                                         621         }
612         return ret;                               622         return ret;
613 }                                                 623 }
614                                                   624 
615 static long pipe_ioctl(struct file *filp, unsi    625 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
616 {                                                 626 {
617         struct pipe_inode_info *pipe = filp->p    627         struct pipe_inode_info *pipe = filp->private_data;
618         unsigned int count, head, tail, mask;  !! 628         int count, head, tail, mask;
619                                                   629 
620         switch (cmd) {                            630         switch (cmd) {
621         case FIONREAD:                         !! 631                 case FIONREAD:
622                 mutex_lock(&pipe->mutex);      !! 632                         __pipe_lock(pipe);
623                 count = 0;                     !! 633                         count = 0;
624                 head = pipe->head;             !! 634                         head = pipe->head;
625                 tail = pipe->tail;             !! 635                         tail = pipe->tail;
626                 mask = pipe->ring_size - 1;    !! 636                         mask = pipe->ring_size - 1;
627                                                !! 637 
628                 while (tail != head) {         !! 638                         while (tail != head) {
629                         count += pipe->bufs[ta !! 639                                 count += pipe->bufs[tail & mask].len;
630                         tail++;                !! 640                                 tail++;
631                 }                              !! 641                         }
632                 mutex_unlock(&pipe->mutex);    !! 642                         __pipe_unlock(pipe);
633                                                << 
634                 return put_user(count, (int __ << 
635                                                << 
636 #ifdef CONFIG_WATCH_QUEUE                      << 
637         case IOC_WATCH_QUEUE_SET_SIZE: {       << 
638                 int ret;                       << 
639                 mutex_lock(&pipe->mutex);      << 
640                 ret = watch_queue_set_size(pip << 
641                 mutex_unlock(&pipe->mutex);    << 
642                 return ret;                    << 
643         }                                      << 
644                                                << 
645         case IOC_WATCH_QUEUE_SET_FILTER:       << 
646                 return watch_queue_set_filter( << 
647                         pipe, (struct watch_no << 
648 #endif                                         << 
649                                                   643 
650         default:                               !! 644                         return put_user(count, (int __user *)arg);
651                 return -ENOIOCTLCMD;           !! 645                 default:
                                                   >> 646                         return -ENOIOCTLCMD;
652         }                                         647         }
653 }                                                 648 }
654                                                   649 
655 /* No kernel lock held - fine */                  650 /* No kernel lock held - fine */
656 static __poll_t                                   651 static __poll_t
657 pipe_poll(struct file *filp, poll_table *wait)    652 pipe_poll(struct file *filp, poll_table *wait)
658 {                                                 653 {
659         __poll_t mask;                            654         __poll_t mask;
660         struct pipe_inode_info *pipe = filp->p    655         struct pipe_inode_info *pipe = filp->private_data;
661         unsigned int head, tail;                  656         unsigned int head, tail;
662                                                   657 
663         /* Epoll has some historical nasty sem << 
664         WRITE_ONCE(pipe->poll_usage, true);    << 
665                                                << 
666         /*                                        658         /*
667          * Reading pipe state only -- no need     659          * Reading pipe state only -- no need for acquiring the semaphore.
668          *                                        660          *
669          * But because this is racy, the code     661          * But because this is racy, the code has to add the
670          * entry to the poll table _first_ ..     662          * entry to the poll table _first_ ..
671          */                                       663          */
672         if (filp->f_mode & FMODE_READ)            664         if (filp->f_mode & FMODE_READ)
673                 poll_wait(filp, &pipe->rd_wait    665                 poll_wait(filp, &pipe->rd_wait, wait);
674         if (filp->f_mode & FMODE_WRITE)           666         if (filp->f_mode & FMODE_WRITE)
675                 poll_wait(filp, &pipe->wr_wait    667                 poll_wait(filp, &pipe->wr_wait, wait);
676                                                   668 
677         /*                                        669         /*
678          * .. and only then can you do the rac    670          * .. and only then can you do the racy tests. That way,
679          * if something changes and you got it    671          * if something changes and you got it wrong, the poll
680          * table entry will wake you up and fi    672          * table entry will wake you up and fix it.
681          */                                       673          */
682         head = READ_ONCE(pipe->head);             674         head = READ_ONCE(pipe->head);
683         tail = READ_ONCE(pipe->tail);             675         tail = READ_ONCE(pipe->tail);
684                                                   676 
685         mask = 0;                                 677         mask = 0;
686         if (filp->f_mode & FMODE_READ) {          678         if (filp->f_mode & FMODE_READ) {
687                 if (!pipe_empty(head, tail))      679                 if (!pipe_empty(head, tail))
688                         mask |= EPOLLIN | EPOL    680                         mask |= EPOLLIN | EPOLLRDNORM;
689                 if (!pipe->writers && filp->f_ !! 681                 if (!pipe->writers && filp->f_version != pipe->w_counter)
690                         mask |= EPOLLHUP;         682                         mask |= EPOLLHUP;
691         }                                         683         }
692                                                   684 
693         if (filp->f_mode & FMODE_WRITE) {         685         if (filp->f_mode & FMODE_WRITE) {
694                 if (!pipe_full(head, tail, pip    686                 if (!pipe_full(head, tail, pipe->max_usage))
695                         mask |= EPOLLOUT | EPO    687                         mask |= EPOLLOUT | EPOLLWRNORM;
696                 /*                                688                 /*
697                  * Most Unices do not set EPOL    689                  * Most Unices do not set EPOLLERR for FIFOs but on Linux they
698                  * behave exactly like pipes f    690                  * behave exactly like pipes for poll().
699                  */                               691                  */
700                 if (!pipe->readers)               692                 if (!pipe->readers)
701                         mask |= EPOLLERR;         693                         mask |= EPOLLERR;
702         }                                         694         }
703                                                   695 
704         return mask;                              696         return mask;
705 }                                                 697 }
706                                                   698 
707 static void put_pipe_info(struct inode *inode,    699 static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
708 {                                                 700 {
709         int kill = 0;                             701         int kill = 0;
710                                                   702 
711         spin_lock(&inode->i_lock);                703         spin_lock(&inode->i_lock);
712         if (!--pipe->files) {                     704         if (!--pipe->files) {
713                 inode->i_pipe = NULL;             705                 inode->i_pipe = NULL;
714                 kill = 1;                         706                 kill = 1;
715         }                                         707         }
716         spin_unlock(&inode->i_lock);              708         spin_unlock(&inode->i_lock);
717                                                   709 
718         if (kill)                                 710         if (kill)
719                 free_pipe_info(pipe);             711                 free_pipe_info(pipe);
720 }                                                 712 }
721                                                   713 
722 static int                                        714 static int
723 pipe_release(struct inode *inode, struct file     715 pipe_release(struct inode *inode, struct file *file)
724 {                                                 716 {
725         struct pipe_inode_info *pipe = file->p    717         struct pipe_inode_info *pipe = file->private_data;
726                                                   718 
727         mutex_lock(&pipe->mutex);              !! 719         __pipe_lock(pipe);
728         if (file->f_mode & FMODE_READ)            720         if (file->f_mode & FMODE_READ)
729                 pipe->readers--;                  721                 pipe->readers--;
730         if (file->f_mode & FMODE_WRITE)           722         if (file->f_mode & FMODE_WRITE)
731                 pipe->writers--;                  723                 pipe->writers--;
732                                                   724 
733         /* Was that the last reader or writer,    725         /* Was that the last reader or writer, but not the other side? */
734         if (!pipe->readers != !pipe->writers)     726         if (!pipe->readers != !pipe->writers) {
735                 wake_up_interruptible_all(&pip    727                 wake_up_interruptible_all(&pipe->rd_wait);
736                 wake_up_interruptible_all(&pip    728                 wake_up_interruptible_all(&pipe->wr_wait);
737                 kill_fasync(&pipe->fasync_read    729                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
738                 kill_fasync(&pipe->fasync_writ    730                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
739         }                                         731         }
740         mutex_unlock(&pipe->mutex);            !! 732         __pipe_unlock(pipe);
741                                                   733 
742         put_pipe_info(inode, pipe);               734         put_pipe_info(inode, pipe);
743         return 0;                                 735         return 0;
744 }                                                 736 }
745                                                   737 
746 static int                                        738 static int
747 pipe_fasync(int fd, struct file *filp, int on)    739 pipe_fasync(int fd, struct file *filp, int on)
748 {                                                 740 {
749         struct pipe_inode_info *pipe = filp->p    741         struct pipe_inode_info *pipe = filp->private_data;
750         int retval = 0;                           742         int retval = 0;
751                                                   743 
752         mutex_lock(&pipe->mutex);              !! 744         __pipe_lock(pipe);
753         if (filp->f_mode & FMODE_READ)            745         if (filp->f_mode & FMODE_READ)
754                 retval = fasync_helper(fd, fil    746                 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
755         if ((filp->f_mode & FMODE_WRITE) && re    747         if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
756                 retval = fasync_helper(fd, fil    748                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
757                 if (retval < 0 && (filp->f_mod    749                 if (retval < 0 && (filp->f_mode & FMODE_READ))
758                         /* this can happen onl    750                         /* this can happen only if on == T */
759                         fasync_helper(-1, filp    751                         fasync_helper(-1, filp, 0, &pipe->fasync_readers);
760         }                                         752         }
761         mutex_unlock(&pipe->mutex);            !! 753         __pipe_unlock(pipe);
762         return retval;                            754         return retval;
763 }                                                 755 }
764                                                   756 
765 unsigned long account_pipe_buffers(struct user !! 757 static unsigned long account_pipe_buffers(struct user_struct *user,
766                                    unsigned lo !! 758                                  unsigned long old, unsigned long new)
767 {                                                 759 {
768         return atomic_long_add_return(new - ol    760         return atomic_long_add_return(new - old, &user->pipe_bufs);
769 }                                                 761 }
770                                                   762 
771 bool too_many_pipe_buffers_soft(unsigned long  !! 763 static bool too_many_pipe_buffers_soft(unsigned long user_bufs)
772 {                                                 764 {
773         unsigned long soft_limit = READ_ONCE(p    765         unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
774                                                   766 
775         return soft_limit && user_bufs > soft_    767         return soft_limit && user_bufs > soft_limit;
776 }                                                 768 }
777                                                   769 
778 bool too_many_pipe_buffers_hard(unsigned long  !! 770 static bool too_many_pipe_buffers_hard(unsigned long user_bufs)
779 {                                                 771 {
780         unsigned long hard_limit = READ_ONCE(p    772         unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
781                                                   773 
782         return hard_limit && user_bufs > hard_    774         return hard_limit && user_bufs > hard_limit;
783 }                                                 775 }
784                                                   776 
785 bool pipe_is_unprivileged_user(void)           !! 777 static bool is_unprivileged_user(void)
786 {                                                 778 {
787         return !capable(CAP_SYS_RESOURCE) && !    779         return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
788 }                                                 780 }
789                                                   781 
790 struct pipe_inode_info *alloc_pipe_info(void)     782 struct pipe_inode_info *alloc_pipe_info(void)
791 {                                                 783 {
792         struct pipe_inode_info *pipe;             784         struct pipe_inode_info *pipe;
793         unsigned long pipe_bufs = PIPE_DEF_BUF    785         unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
794         struct user_struct *user = get_current    786         struct user_struct *user = get_current_user();
795         unsigned long user_bufs;                  787         unsigned long user_bufs;
796         unsigned int max_size = READ_ONCE(pipe    788         unsigned int max_size = READ_ONCE(pipe_max_size);
797                                                   789 
798         pipe = kzalloc(sizeof(struct pipe_inod    790         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
799         if (pipe == NULL)                         791         if (pipe == NULL)
800                 goto out_free_uid;                792                 goto out_free_uid;
801                                                   793 
802         if (pipe_bufs * PAGE_SIZE > max_size &    794         if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
803                 pipe_bufs = max_size >> PAGE_S    795                 pipe_bufs = max_size >> PAGE_SHIFT;
804                                                   796 
805         user_bufs = account_pipe_buffers(user,    797         user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
806                                                   798 
807         if (too_many_pipe_buffers_soft(user_bu !! 799         if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) {
808                 user_bufs = account_pipe_buffe !! 800                 user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
809                 pipe_bufs = PIPE_MIN_DEF_BUFFE !! 801                 pipe_bufs = 1;
810         }                                         802         }
811                                                   803 
812         if (too_many_pipe_buffers_hard(user_bu !! 804         if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user())
813                 goto out_revert_acct;             805                 goto out_revert_acct;
814                                                   806 
815         pipe->bufs = kcalloc(pipe_bufs, sizeof    807         pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
816                              GFP_KERNEL_ACCOUN    808                              GFP_KERNEL_ACCOUNT);
817                                                   809 
818         if (pipe->bufs) {                         810         if (pipe->bufs) {
819                 init_waitqueue_head(&pipe->rd_    811                 init_waitqueue_head(&pipe->rd_wait);
820                 init_waitqueue_head(&pipe->wr_    812                 init_waitqueue_head(&pipe->wr_wait);
821                 pipe->r_counter = pipe->w_coun    813                 pipe->r_counter = pipe->w_counter = 1;
822                 pipe->max_usage = pipe_bufs;      814                 pipe->max_usage = pipe_bufs;
823                 pipe->ring_size = pipe_bufs;      815                 pipe->ring_size = pipe_bufs;
824                 pipe->nr_accounted = pipe_bufs << 
825                 pipe->user = user;                816                 pipe->user = user;
826                 mutex_init(&pipe->mutex);         817                 mutex_init(&pipe->mutex);
827                 lock_set_cmp_fn(&pipe->mutex,  << 
828                 return pipe;                      818                 return pipe;
829         }                                         819         }
830                                                   820 
831 out_revert_acct:                                  821 out_revert_acct:
832         (void) account_pipe_buffers(user, pipe    822         (void) account_pipe_buffers(user, pipe_bufs, 0);
833         kfree(pipe);                              823         kfree(pipe);
834 out_free_uid:                                     824 out_free_uid:
835         free_uid(user);                           825         free_uid(user);
836         return NULL;                              826         return NULL;
837 }                                                 827 }
838                                                   828 
839 void free_pipe_info(struct pipe_inode_info *pi    829 void free_pipe_info(struct pipe_inode_info *pipe)
840 {                                                 830 {
841         unsigned int i;                        !! 831         int i;
842                                                << 
843 #ifdef CONFIG_WATCH_QUEUE                      << 
844         if (pipe->watch_queue)                 << 
845                 watch_queue_clear(pipe->watch_ << 
846 #endif                                         << 
847                                                   832 
848         (void) account_pipe_buffers(pipe->user !! 833         (void) account_pipe_buffers(pipe->user, pipe->ring_size, 0);
849         free_uid(pipe->user);                     834         free_uid(pipe->user);
850         for (i = 0; i < pipe->ring_size; i++)     835         for (i = 0; i < pipe->ring_size; i++) {
851                 struct pipe_buffer *buf = pipe    836                 struct pipe_buffer *buf = pipe->bufs + i;
852                 if (buf->ops)                     837                 if (buf->ops)
853                         pipe_buf_release(pipe,    838                         pipe_buf_release(pipe, buf);
854         }                                         839         }
855 #ifdef CONFIG_WATCH_QUEUE                      << 
856         if (pipe->watch_queue)                 << 
857                 put_watch_queue(pipe->watch_qu << 
858 #endif                                         << 
859         if (pipe->tmp_page)                       840         if (pipe->tmp_page)
860                 __free_page(pipe->tmp_page);      841                 __free_page(pipe->tmp_page);
861         kfree(pipe->bufs);                        842         kfree(pipe->bufs);
862         kfree(pipe);                              843         kfree(pipe);
863 }                                                 844 }
864                                                   845 
865 static struct vfsmount *pipe_mnt __ro_after_in !! 846 static struct vfsmount *pipe_mnt __read_mostly;
866                                                   847 
867 /*                                                848 /*
868  * pipefs_dname() is called from d_path().        849  * pipefs_dname() is called from d_path().
869  */                                               850  */
870 static char *pipefs_dname(struct dentry *dentr    851 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
871 {                                                 852 {
872         return dynamic_dname(buffer, buflen, " !! 853         return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
873                                 d_inode(dentry    854                                 d_inode(dentry)->i_ino);
874 }                                                 855 }
875                                                   856 
876 static const struct dentry_operations pipefs_d    857 static const struct dentry_operations pipefs_dentry_operations = {
877         .d_dname        = pipefs_dname,           858         .d_dname        = pipefs_dname,
878 };                                                859 };
879                                                   860 
880 static struct inode * get_pipe_inode(void)        861 static struct inode * get_pipe_inode(void)
881 {                                                 862 {
882         struct inode *inode = new_inode_pseudo    863         struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
883         struct pipe_inode_info *pipe;             864         struct pipe_inode_info *pipe;
884                                                   865 
885         if (!inode)                               866         if (!inode)
886                 goto fail_inode;                  867                 goto fail_inode;
887                                                   868 
888         inode->i_ino = get_next_ino();            869         inode->i_ino = get_next_ino();
889                                                   870 
890         pipe = alloc_pipe_info();                 871         pipe = alloc_pipe_info();
891         if (!pipe)                                872         if (!pipe)
892                 goto fail_iput;                   873                 goto fail_iput;
893                                                   874 
894         inode->i_pipe = pipe;                     875         inode->i_pipe = pipe;
895         pipe->files = 2;                          876         pipe->files = 2;
896         pipe->readers = pipe->writers = 1;        877         pipe->readers = pipe->writers = 1;
897         inode->i_fop = &pipefifo_fops;            878         inode->i_fop = &pipefifo_fops;
898                                                   879 
899         /*                                        880         /*
900          * Mark the inode dirty from the very     881          * Mark the inode dirty from the very beginning,
901          * that way it will never be moved to     882          * that way it will never be moved to the dirty
902          * list because "mark_inode_dirty()" w    883          * list because "mark_inode_dirty()" will think
903          * that it already _is_ on the dirty l    884          * that it already _is_ on the dirty list.
904          */                                       885          */
905         inode->i_state = I_DIRTY;                 886         inode->i_state = I_DIRTY;
906         inode->i_mode = S_IFIFO | S_IRUSR | S_    887         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
907         inode->i_uid = current_fsuid();           888         inode->i_uid = current_fsuid();
908         inode->i_gid = current_fsgid();           889         inode->i_gid = current_fsgid();
909         simple_inode_init_ts(inode);           !! 890         inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
910                                                   891 
911         return inode;                             892         return inode;
912                                                   893 
913 fail_iput:                                        894 fail_iput:
914         iput(inode);                              895         iput(inode);
915                                                   896 
916 fail_inode:                                       897 fail_inode:
917         return NULL;                              898         return NULL;
918 }                                                 899 }
919                                                   900 
920 int create_pipe_files(struct file **res, int f    901 int create_pipe_files(struct file **res, int flags)
921 {                                                 902 {
922         struct inode *inode = get_pipe_inode()    903         struct inode *inode = get_pipe_inode();
923         struct file *f;                           904         struct file *f;
924         int error;                             << 
925                                                   905 
926         if (!inode)                               906         if (!inode)
927                 return -ENFILE;                   907                 return -ENFILE;
928                                                   908 
929         if (flags & O_NOTIFICATION_PIPE) {     << 
930                 error = watch_queue_init(inode << 
931                 if (error) {                   << 
932                         free_pipe_info(inode-> << 
933                         iput(inode);           << 
934                         return error;          << 
935                 }                              << 
936         }                                      << 
937                                                << 
938         f = alloc_file_pseudo(inode, pipe_mnt,    909         f = alloc_file_pseudo(inode, pipe_mnt, "",
939                                 O_WRONLY | (fl    910                                 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
940                                 &pipefifo_fops    911                                 &pipefifo_fops);
941         if (IS_ERR(f)) {                          912         if (IS_ERR(f)) {
942                 free_pipe_info(inode->i_pipe);    913                 free_pipe_info(inode->i_pipe);
943                 iput(inode);                      914                 iput(inode);
944                 return PTR_ERR(f);                915                 return PTR_ERR(f);
945         }                                         916         }
946                                                   917 
947         f->private_data = inode->i_pipe;          918         f->private_data = inode->i_pipe;
948         f->f_pipe = 0;                         << 
949                                                   919 
950         res[0] = alloc_file_clone(f, O_RDONLY     920         res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
951                                   &pipefifo_fo    921                                   &pipefifo_fops);
952         if (IS_ERR(res[0])) {                     922         if (IS_ERR(res[0])) {
953                 put_pipe_info(inode, inode->i_    923                 put_pipe_info(inode, inode->i_pipe);
954                 fput(f);                          924                 fput(f);
955                 return PTR_ERR(res[0]);           925                 return PTR_ERR(res[0]);
956         }                                         926         }
957         res[0]->private_data = inode->i_pipe;     927         res[0]->private_data = inode->i_pipe;
958         res[0]->f_pipe = 0;                    << 
959         res[1] = f;                               928         res[1] = f;
960         stream_open(inode, res[0]);               929         stream_open(inode, res[0]);
961         stream_open(inode, res[1]);               930         stream_open(inode, res[1]);
962         return 0;                                 931         return 0;
963 }                                                 932 }
964                                                   933 
965 static int __do_pipe_flags(int *fd, struct fil    934 static int __do_pipe_flags(int *fd, struct file **files, int flags)
966 {                                                 935 {
967         int error;                                936         int error;
968         int fdw, fdr;                             937         int fdw, fdr;
969                                                   938 
970         if (flags & ~(O_CLOEXEC | O_NONBLOCK | !! 939         if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
971                 return -EINVAL;                   940                 return -EINVAL;
972                                                   941 
973         error = create_pipe_files(files, flags    942         error = create_pipe_files(files, flags);
974         if (error)                                943         if (error)
975                 return error;                     944                 return error;
976                                                   945 
977         error = get_unused_fd_flags(flags);       946         error = get_unused_fd_flags(flags);
978         if (error < 0)                            947         if (error < 0)
979                 goto err_read_pipe;               948                 goto err_read_pipe;
980         fdr = error;                              949         fdr = error;
981                                                   950 
982         error = get_unused_fd_flags(flags);       951         error = get_unused_fd_flags(flags);
983         if (error < 0)                            952         if (error < 0)
984                 goto err_fdr;                     953                 goto err_fdr;
985         fdw = error;                              954         fdw = error;
986                                                   955 
987         audit_fd_pair(fdr, fdw);                  956         audit_fd_pair(fdr, fdw);
988         fd[0] = fdr;                              957         fd[0] = fdr;
989         fd[1] = fdw;                              958         fd[1] = fdw;
990         /* pipe groks IOCB_NOWAIT */           << 
991         files[0]->f_mode |= FMODE_NOWAIT;      << 
992         files[1]->f_mode |= FMODE_NOWAIT;      << 
993         return 0;                                 959         return 0;
994                                                   960 
995  err_fdr:                                         961  err_fdr:
996         put_unused_fd(fdr);                       962         put_unused_fd(fdr);
997  err_read_pipe:                                   963  err_read_pipe:
998         fput(files[0]);                           964         fput(files[0]);
999         fput(files[1]);                           965         fput(files[1]);
1000         return error;                            966         return error;
1001 }                                                967 }
1002                                                  968 
1003 int do_pipe_flags(int *fd, int flags)            969 int do_pipe_flags(int *fd, int flags)
1004 {                                                970 {
1005         struct file *files[2];                   971         struct file *files[2];
1006         int error = __do_pipe_flags(fd, files    972         int error = __do_pipe_flags(fd, files, flags);
1007         if (!error) {                            973         if (!error) {
1008                 fd_install(fd[0], files[0]);     974                 fd_install(fd[0], files[0]);
1009                 fd_install(fd[1], files[1]);     975                 fd_install(fd[1], files[1]);
1010         }                                        976         }
1011         return error;                            977         return error;
1012 }                                                978 }
1013                                                  979 
1014 /*                                               980 /*
1015  * sys_pipe() is the normal C calling standar    981  * sys_pipe() is the normal C calling standard for creating
1016  * a pipe. It's not the way Unix traditionall    982  * a pipe. It's not the way Unix traditionally does this, though.
1017  */                                              983  */
1018 static int do_pipe2(int __user *fildes, int f    984 static int do_pipe2(int __user *fildes, int flags)
1019 {                                                985 {
1020         struct file *files[2];                   986         struct file *files[2];
1021         int fd[2];                               987         int fd[2];
1022         int error;                               988         int error;
1023                                                  989 
1024         error = __do_pipe_flags(fd, files, fl    990         error = __do_pipe_flags(fd, files, flags);
1025         if (!error) {                            991         if (!error) {
1026                 if (unlikely(copy_to_user(fil    992                 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
1027                         fput(files[0]);          993                         fput(files[0]);
1028                         fput(files[1]);          994                         fput(files[1]);
1029                         put_unused_fd(fd[0]);    995                         put_unused_fd(fd[0]);
1030                         put_unused_fd(fd[1]);    996                         put_unused_fd(fd[1]);
1031                         error = -EFAULT;         997                         error = -EFAULT;
1032                 } else {                         998                 } else {
1033                         fd_install(fd[0], fil    999                         fd_install(fd[0], files[0]);
1034                         fd_install(fd[1], fil    1000                         fd_install(fd[1], files[1]);
1035                 }                                1001                 }
1036         }                                        1002         }
1037         return error;                            1003         return error;
1038 }                                                1004 }
1039                                                  1005 
1040 SYSCALL_DEFINE2(pipe2, int __user *, fildes,     1006 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1041 {                                                1007 {
1042         return do_pipe2(fildes, flags);          1008         return do_pipe2(fildes, flags);
1043 }                                                1009 }
1044                                                  1010 
1045 SYSCALL_DEFINE1(pipe, int __user *, fildes)      1011 SYSCALL_DEFINE1(pipe, int __user *, fildes)
1046 {                                                1012 {
1047         return do_pipe2(fildes, 0);              1013         return do_pipe2(fildes, 0);
1048 }                                                1014 }
1049                                                  1015 
1050 /*                                            << 
1051  * This is the stupid "wait for pipe to be re << 
1052  * model.                                     << 
1053  *                                            << 
1054  * See pipe_read/write() for the proper kind  << 
1055  * but that requires that we wake up any othe << 
1056  * if we then do not end up reading everythin << 
1057  * "wake_next_reader/writer" logic in pipe_re << 
1058  */                                           << 
1059 void pipe_wait_readable(struct pipe_inode_inf << 
1060 {                                             << 
1061         pipe_unlock(pipe);                    << 
1062         wait_event_interruptible(pipe->rd_wai << 
1063         pipe_lock(pipe);                      << 
1064 }                                             << 
1065                                               << 
1066 void pipe_wait_writable(struct pipe_inode_inf << 
1067 {                                             << 
1068         pipe_unlock(pipe);                    << 
1069         wait_event_interruptible(pipe->wr_wai << 
1070         pipe_lock(pipe);                      << 
1071 }                                             << 
1072                                               << 
1073 /*                                            << 
1074  * This depends on both the wait (here) and t << 
1075  * holding the pipe lock, so "*cnt" is stable << 
1076  * race with the count check and waitqueue pr << 
1077  *                                            << 
1078  * Normally in order to avoid races, you'd do << 
1079  * then check the condition you're waiting fo << 
1080  * because of the pipe lock, we can check the << 
1081  * the wait queue.                            << 
1082  *                                            << 
1083  * We use the 'rd_wait' waitqueue for pipe pa << 
1084  */                                           << 
1085 static int wait_for_partner(struct pipe_inode    1016 static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
1086 {                                                1017 {
1087         DEFINE_WAIT(rdwait);                  << 
1088         int cur = *cnt;                          1018         int cur = *cnt;
1089                                                  1019 
1090         while (cur == *cnt) {                    1020         while (cur == *cnt) {
1091                 prepare_to_wait(&pipe->rd_wai !! 1021                 pipe_wait(pipe);
1092                 pipe_unlock(pipe);            << 
1093                 schedule();                   << 
1094                 finish_wait(&pipe->rd_wait, & << 
1095                 pipe_lock(pipe);              << 
1096                 if (signal_pending(current))     1022                 if (signal_pending(current))
1097                         break;                   1023                         break;
1098         }                                        1024         }
1099         return cur == *cnt ? -ERESTARTSYS : 0    1025         return cur == *cnt ? -ERESTARTSYS : 0;
1100 }                                                1026 }
1101                                                  1027 
1102 static void wake_up_partner(struct pipe_inode    1028 static void wake_up_partner(struct pipe_inode_info *pipe)
1103 {                                                1029 {
1104         wake_up_interruptible_all(&pipe->rd_w    1030         wake_up_interruptible_all(&pipe->rd_wait);
                                                   >> 1031         wake_up_interruptible_all(&pipe->wr_wait);
1105 }                                                1032 }
1106                                                  1033 
1107 static int fifo_open(struct inode *inode, str    1034 static int fifo_open(struct inode *inode, struct file *filp)
1108 {                                                1035 {
1109         struct pipe_inode_info *pipe;            1036         struct pipe_inode_info *pipe;
1110         bool is_pipe = inode->i_sb->s_magic =    1037         bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
1111         int ret;                                 1038         int ret;
1112                                                  1039 
1113         filp->f_pipe = 0;                     !! 1040         filp->f_version = 0;
1114                                                  1041 
1115         spin_lock(&inode->i_lock);               1042         spin_lock(&inode->i_lock);
1116         if (inode->i_pipe) {                     1043         if (inode->i_pipe) {
1117                 pipe = inode->i_pipe;            1044                 pipe = inode->i_pipe;
1118                 pipe->files++;                   1045                 pipe->files++;
1119                 spin_unlock(&inode->i_lock);     1046                 spin_unlock(&inode->i_lock);
1120         } else {                                 1047         } else {
1121                 spin_unlock(&inode->i_lock);     1048                 spin_unlock(&inode->i_lock);
1122                 pipe = alloc_pipe_info();        1049                 pipe = alloc_pipe_info();
1123                 if (!pipe)                       1050                 if (!pipe)
1124                         return -ENOMEM;          1051                         return -ENOMEM;
1125                 pipe->files = 1;                 1052                 pipe->files = 1;
1126                 spin_lock(&inode->i_lock);       1053                 spin_lock(&inode->i_lock);
1127                 if (unlikely(inode->i_pipe))     1054                 if (unlikely(inode->i_pipe)) {
1128                         inode->i_pipe->files+    1055                         inode->i_pipe->files++;
1129                         spin_unlock(&inode->i    1056                         spin_unlock(&inode->i_lock);
1130                         free_pipe_info(pipe);    1057                         free_pipe_info(pipe);
1131                         pipe = inode->i_pipe;    1058                         pipe = inode->i_pipe;
1132                 } else {                         1059                 } else {
1133                         inode->i_pipe = pipe;    1060                         inode->i_pipe = pipe;
1134                         spin_unlock(&inode->i    1061                         spin_unlock(&inode->i_lock);
1135                 }                                1062                 }
1136         }                                        1063         }
1137         filp->private_data = pipe;               1064         filp->private_data = pipe;
1138         /* OK, we have a pipe and it's pinned    1065         /* OK, we have a pipe and it's pinned down */
1139                                                  1066 
1140         mutex_lock(&pipe->mutex);             !! 1067         __pipe_lock(pipe);
1141                                                  1068 
1142         /* We can only do regular read/write     1069         /* We can only do regular read/write on fifos */
1143         stream_open(inode, filp);                1070         stream_open(inode, filp);
1144                                                  1071 
1145         switch (filp->f_mode & (FMODE_READ |     1072         switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
1146         case FMODE_READ:                         1073         case FMODE_READ:
1147         /*                                       1074         /*
1148          *  O_RDONLY                             1075          *  O_RDONLY
1149          *  POSIX.1 says that O_NONBLOCK mean    1076          *  POSIX.1 says that O_NONBLOCK means return with the FIFO
1150          *  opened, even when there is no pro    1077          *  opened, even when there is no process writing the FIFO.
1151          */                                      1078          */
1152                 pipe->r_counter++;               1079                 pipe->r_counter++;
1153                 if (pipe->readers++ == 0)        1080                 if (pipe->readers++ == 0)
1154                         wake_up_partner(pipe)    1081                         wake_up_partner(pipe);
1155                                                  1082 
1156                 if (!is_pipe && !pipe->writer    1083                 if (!is_pipe && !pipe->writers) {
1157                         if ((filp->f_flags &     1084                         if ((filp->f_flags & O_NONBLOCK)) {
1158                                 /* suppress E    1085                                 /* suppress EPOLLHUP until we have
1159                                  * seen a wri    1086                                  * seen a writer */
1160                                 filp->f_pipe  !! 1087                                 filp->f_version = pipe->w_counter;
1161                         } else {                 1088                         } else {
1162                                 if (wait_for_    1089                                 if (wait_for_partner(pipe, &pipe->w_counter))
1163                                         goto     1090                                         goto err_rd;
1164                         }                        1091                         }
1165                 }                                1092                 }
1166                 break;                           1093                 break;
1167                                                  1094 
1168         case FMODE_WRITE:                        1095         case FMODE_WRITE:
1169         /*                                       1096         /*
1170          *  O_WRONLY                             1097          *  O_WRONLY
1171          *  POSIX.1 says that O_NONBLOCK mean    1098          *  POSIX.1 says that O_NONBLOCK means return -1 with
1172          *  errno=ENXIO when there is no proc    1099          *  errno=ENXIO when there is no process reading the FIFO.
1173          */                                      1100          */
1174                 ret = -ENXIO;                    1101                 ret = -ENXIO;
1175                 if (!is_pipe && (filp->f_flag    1102                 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
1176                         goto err;                1103                         goto err;
1177                                                  1104 
1178                 pipe->w_counter++;               1105                 pipe->w_counter++;
1179                 if (!pipe->writers++)            1106                 if (!pipe->writers++)
1180                         wake_up_partner(pipe)    1107                         wake_up_partner(pipe);
1181                                                  1108 
1182                 if (!is_pipe && !pipe->reader    1109                 if (!is_pipe && !pipe->readers) {
1183                         if (wait_for_partner(    1110                         if (wait_for_partner(pipe, &pipe->r_counter))
1184                                 goto err_wr;     1111                                 goto err_wr;
1185                 }                                1112                 }
1186                 break;                           1113                 break;
1187                                                  1114 
1188         case FMODE_READ | FMODE_WRITE:           1115         case FMODE_READ | FMODE_WRITE:
1189         /*                                       1116         /*
1190          *  O_RDWR                               1117          *  O_RDWR
1191          *  POSIX.1 leaves this case "undefin    1118          *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
1192          *  This implementation will NEVER bl    1119          *  This implementation will NEVER block on a O_RDWR open, since
1193          *  the process can at least talk to     1120          *  the process can at least talk to itself.
1194          */                                      1121          */
1195                                                  1122 
1196                 pipe->readers++;                 1123                 pipe->readers++;
1197                 pipe->writers++;                 1124                 pipe->writers++;
1198                 pipe->r_counter++;               1125                 pipe->r_counter++;
1199                 pipe->w_counter++;               1126                 pipe->w_counter++;
1200                 if (pipe->readers == 1 || pip    1127                 if (pipe->readers == 1 || pipe->writers == 1)
1201                         wake_up_partner(pipe)    1128                         wake_up_partner(pipe);
1202                 break;                           1129                 break;
1203                                                  1130 
1204         default:                                 1131         default:
1205                 ret = -EINVAL;                   1132                 ret = -EINVAL;
1206                 goto err;                        1133                 goto err;
1207         }                                        1134         }
1208                                                  1135 
1209         /* Ok! */                                1136         /* Ok! */
1210         mutex_unlock(&pipe->mutex);           !! 1137         __pipe_unlock(pipe);
1211         return 0;                                1138         return 0;
1212                                                  1139 
1213 err_rd:                                          1140 err_rd:
1214         if (!--pipe->readers)                    1141         if (!--pipe->readers)
1215                 wake_up_interruptible(&pipe->    1142                 wake_up_interruptible(&pipe->wr_wait);
1216         ret = -ERESTARTSYS;                      1143         ret = -ERESTARTSYS;
1217         goto err;                                1144         goto err;
1218                                                  1145 
1219 err_wr:                                          1146 err_wr:
1220         if (!--pipe->writers)                    1147         if (!--pipe->writers)
1221                 wake_up_interruptible_all(&pi    1148                 wake_up_interruptible_all(&pipe->rd_wait);
1222         ret = -ERESTARTSYS;                      1149         ret = -ERESTARTSYS;
1223         goto err;                                1150         goto err;
1224                                                  1151 
1225 err:                                             1152 err:
1226         mutex_unlock(&pipe->mutex);           !! 1153         __pipe_unlock(pipe);
1227                                                  1154 
1228         put_pipe_info(inode, pipe);              1155         put_pipe_info(inode, pipe);
1229         return ret;                              1156         return ret;
1230 }                                                1157 }
1231                                                  1158 
1232 const struct file_operations pipefifo_fops =     1159 const struct file_operations pipefifo_fops = {
1233         .open           = fifo_open,             1160         .open           = fifo_open,
                                                   >> 1161         .llseek         = no_llseek,
1234         .read_iter      = pipe_read,             1162         .read_iter      = pipe_read,
1235         .write_iter     = pipe_write,            1163         .write_iter     = pipe_write,
1236         .poll           = pipe_poll,             1164         .poll           = pipe_poll,
1237         .unlocked_ioctl = pipe_ioctl,            1165         .unlocked_ioctl = pipe_ioctl,
1238         .release        = pipe_release,          1166         .release        = pipe_release,
1239         .fasync         = pipe_fasync,           1167         .fasync         = pipe_fasync,
1240         .splice_write   = iter_file_splice_wr << 
1241 };                                               1168 };
1242                                                  1169 
1243 /*                                               1170 /*
1244  * Currently we rely on the pipe array holdin    1171  * Currently we rely on the pipe array holding a power-of-2 number
1245  * of pages. Returns 0 on error.                 1172  * of pages. Returns 0 on error.
1246  */                                              1173  */
1247 unsigned int round_pipe_size(unsigned int siz !! 1174 unsigned int round_pipe_size(unsigned long size)
1248 {                                                1175 {
1249         if (size > (1U << 31))                   1176         if (size > (1U << 31))
1250                 return 0;                        1177                 return 0;
1251                                                  1178 
1252         /* Minimum pipe size, as required by     1179         /* Minimum pipe size, as required by POSIX */
1253         if (size < PAGE_SIZE)                    1180         if (size < PAGE_SIZE)
1254                 return PAGE_SIZE;                1181                 return PAGE_SIZE;
1255                                                  1182 
1256         return roundup_pow_of_two(size);         1183         return roundup_pow_of_two(size);
1257 }                                                1184 }
1258                                                  1185 
1259 /*                                               1186 /*
1260  * Resize the pipe ring to a number of slots. !! 1187  * Allocate a new array of pipe buffers and copy the info over. Returns the
1261  *                                            !! 1188  * pipe size if successful, or return -ERROR on error.
1262  * Note the pipe can be reduced in capacity,  << 
1263  * occupancy doesn't exceed nr_slots; if it d << 
1264  * returned instead.                          << 
1265  */                                              1189  */
1266 int pipe_resize_ring(struct pipe_inode_info * !! 1190 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1267 {                                                1191 {
1268         struct pipe_buffer *bufs;                1192         struct pipe_buffer *bufs;
1269         unsigned int head, tail, mask, n;     !! 1193         unsigned int size, nr_slots, head, tail, mask, n;
                                                   >> 1194         unsigned long user_bufs;
                                                   >> 1195         long ret = 0;
1270                                                  1196 
1271         bufs = kcalloc(nr_slots, sizeof(*bufs !! 1197         size = round_pipe_size(arg);
1272                        GFP_KERNEL_ACCOUNT | _ !! 1198         nr_slots = size >> PAGE_SHIFT;
1273         if (unlikely(!bufs))                  !! 1199 
1274                 return -ENOMEM;               !! 1200         if (!nr_slots)
                                                   >> 1201                 return -EINVAL;
                                                   >> 1202 
                                                   >> 1203         /*
                                                   >> 1204          * If trying to increase the pipe capacity, check that an
                                                   >> 1205          * unprivileged user is not trying to exceed various limits
                                                   >> 1206          * (soft limit check here, hard limit check just below).
                                                   >> 1207          * Decreasing the pipe capacity is always permitted, even
                                                   >> 1208          * if the user is currently over a limit.
                                                   >> 1209          */
                                                   >> 1210         if (nr_slots > pipe->ring_size &&
                                                   >> 1211                         size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
                                                   >> 1212                 return -EPERM;
1275                                                  1213 
1276         spin_lock_irq(&pipe->rd_wait.lock);   !! 1214         user_bufs = account_pipe_buffers(pipe->user, pipe->ring_size, nr_slots);
                                                   >> 1215 
                                                   >> 1216         if (nr_slots > pipe->ring_size &&
                                                   >> 1217                         (too_many_pipe_buffers_hard(user_bufs) ||
                                                   >> 1218                          too_many_pipe_buffers_soft(user_bufs)) &&
                                                   >> 1219                         is_unprivileged_user()) {
                                                   >> 1220                 ret = -EPERM;
                                                   >> 1221                 goto out_revert_acct;
                                                   >> 1222         }
                                                   >> 1223 
                                                   >> 1224         /*
                                                   >> 1225          * We can shrink the pipe, if arg is greater than the ring occupancy.
                                                   >> 1226          * Since we don't expect a lot of shrink+grow operations, just free and
                                                   >> 1227          * allocate again like we would do for growing.  If the pipe currently
                                                   >> 1228          * contains more buffers than arg, then return busy.
                                                   >> 1229          */
1277         mask = pipe->ring_size - 1;              1230         mask = pipe->ring_size - 1;
1278         head = pipe->head;                       1231         head = pipe->head;
1279         tail = pipe->tail;                       1232         tail = pipe->tail;
1280                                               !! 1233         n = pipe_occupancy(pipe->head, pipe->tail);
1281         n = pipe_occupancy(head, tail);       << 
1282         if (nr_slots < n) {                      1234         if (nr_slots < n) {
1283                 spin_unlock_irq(&pipe->rd_wai !! 1235                 ret = -EBUSY;
1284                 kfree(bufs);                  !! 1236                 goto out_revert_acct;
1285                 return -EBUSY;                !! 1237         }
                                                   >> 1238 
                                                   >> 1239         bufs = kcalloc(nr_slots, sizeof(*bufs),
                                                   >> 1240                        GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
                                                   >> 1241         if (unlikely(!bufs)) {
                                                   >> 1242                 ret = -ENOMEM;
                                                   >> 1243                 goto out_revert_acct;
1286         }                                        1244         }
1287                                                  1245 
1288         /*                                       1246         /*
1289          * The pipe array wraps around, so ju    1247          * The pipe array wraps around, so just start the new one at zero
1290          * and adjust the indices.               1248          * and adjust the indices.
1291          */                                      1249          */
1292         if (n > 0) {                             1250         if (n > 0) {
1293                 unsigned int h = head & mask;    1251                 unsigned int h = head & mask;
1294                 unsigned int t = tail & mask;    1252                 unsigned int t = tail & mask;
1295                 if (h > t) {                     1253                 if (h > t) {
1296                         memcpy(bufs, pipe->bu    1254                         memcpy(bufs, pipe->bufs + t,
1297                                n * sizeof(str    1255                                n * sizeof(struct pipe_buffer));
1298                 } else {                         1256                 } else {
1299                         unsigned int tsize =     1257                         unsigned int tsize = pipe->ring_size - t;
1300                         if (h > 0)               1258                         if (h > 0)
1301                                 memcpy(bufs +    1259                                 memcpy(bufs + tsize, pipe->bufs,
1302                                        h * si    1260                                        h * sizeof(struct pipe_buffer));
1303                         memcpy(bufs, pipe->bu    1261                         memcpy(bufs, pipe->bufs + t,
1304                                tsize * sizeof    1262                                tsize * sizeof(struct pipe_buffer));
1305                 }                                1263                 }
1306         }                                        1264         }
1307                                                  1265 
1308         head = n;                                1266         head = n;
1309         tail = 0;                                1267         tail = 0;
1310                                                  1268 
1311         kfree(pipe->bufs);                       1269         kfree(pipe->bufs);
1312         pipe->bufs = bufs;                       1270         pipe->bufs = bufs;
1313         pipe->ring_size = nr_slots;              1271         pipe->ring_size = nr_slots;
1314         if (pipe->max_usage > nr_slots)       !! 1272         pipe->max_usage = nr_slots;
1315                 pipe->max_usage = nr_slots;   << 
1316         pipe->tail = tail;                       1273         pipe->tail = tail;
1317         pipe->head = head;                       1274         pipe->head = head;
1318                                                  1275 
1319         if (!pipe_has_watch_queue(pipe)) {    << 
1320                 pipe->max_usage = nr_slots;   << 
1321                 pipe->nr_accounted = nr_slots << 
1322         }                                     << 
1323                                               << 
1324         spin_unlock_irq(&pipe->rd_wait.lock); << 
1325                                               << 
1326         /* This might have made more room for    1276         /* This might have made more room for writers */
1327         wake_up_interruptible(&pipe->wr_wait)    1277         wake_up_interruptible(&pipe->wr_wait);
1328         return 0;                             << 
1329 }                                             << 
1330                                               << 
1331 /*                                            << 
1332  * Allocate a new array of pipe buffers and c << 
1333  * pipe size if successful, or return -ERROR  << 
1334  */                                           << 
1335 static long pipe_set_size(struct pipe_inode_i << 
1336 {                                             << 
1337         unsigned long user_bufs;              << 
1338         unsigned int nr_slots, size;          << 
1339         long ret = 0;                         << 
1340                                               << 
1341         if (pipe_has_watch_queue(pipe))       << 
1342                 return -EBUSY;                << 
1343                                               << 
1344         size = round_pipe_size(arg);          << 
1345         nr_slots = size >> PAGE_SHIFT;        << 
1346                                               << 
1347         if (!nr_slots)                        << 
1348                 return -EINVAL;               << 
1349                                               << 
1350         /*                                    << 
1351          * If trying to increase the pipe cap << 
1352          * unprivileged user is not trying to << 
1353          * (soft limit check here, hard limit << 
1354          * Decreasing the pipe capacity is al << 
1355          * if the user is currently over a li << 
1356          */                                   << 
1357         if (nr_slots > pipe->max_usage &&     << 
1358                         size > pipe_max_size  << 
1359                 return -EPERM;                << 
1360                                               << 
1361         user_bufs = account_pipe_buffers(pipe << 
1362                                               << 
1363         if (nr_slots > pipe->max_usage &&     << 
1364                         (too_many_pipe_buffer << 
1365                          too_many_pipe_buffer << 
1366                         pipe_is_unprivileged_ << 
1367                 ret = -EPERM;                 << 
1368                 goto out_revert_acct;         << 
1369         }                                     << 
1370                                               << 
1371         ret = pipe_resize_ring(pipe, nr_slots << 
1372         if (ret < 0)                          << 
1373                 goto out_revert_acct;         << 
1374                                               << 
1375         return pipe->max_usage * PAGE_SIZE;      1278         return pipe->max_usage * PAGE_SIZE;
1376                                                  1279 
1377 out_revert_acct:                                 1280 out_revert_acct:
1378         (void) account_pipe_buffers(pipe->use !! 1281         (void) account_pipe_buffers(pipe->user, nr_slots, pipe->ring_size);
1379         return ret;                              1282         return ret;
1380 }                                                1283 }
1381                                                  1284 
1382 /*                                               1285 /*
1383  * Note that i_pipe and i_cdev share the same !! 1286  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1384  * not enough to verify that this is a pipe.  !! 1287  * location, so checking ->i_pipe is not enough to verify that this is a
                                                   >> 1288  * pipe.
1385  */                                              1289  */
1386 struct pipe_inode_info *get_pipe_info(struct  !! 1290 struct pipe_inode_info *get_pipe_info(struct file *file)
1387 {                                                1291 {
1388         struct pipe_inode_info *pipe = file-> !! 1292         return file->f_op == &pipefifo_fops ? file->private_data : NULL;
1389                                               << 
1390         if (file->f_op != &pipefifo_fops || ! << 
1391                 return NULL;                  << 
1392         if (for_splice && pipe_has_watch_queu << 
1393                 return NULL;                  << 
1394         return pipe;                          << 
1395 }                                                1293 }
1396                                                  1294 
1397 long pipe_fcntl(struct file *file, unsigned i !! 1295 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1398 {                                                1296 {
1399         struct pipe_inode_info *pipe;            1297         struct pipe_inode_info *pipe;
1400         long ret;                                1298         long ret;
1401                                                  1299 
1402         pipe = get_pipe_info(file, false);    !! 1300         pipe = get_pipe_info(file);
1403         if (!pipe)                               1301         if (!pipe)
1404                 return -EBADF;                   1302                 return -EBADF;
1405                                                  1303 
1406         mutex_lock(&pipe->mutex);             !! 1304         __pipe_lock(pipe);
1407                                                  1305 
1408         switch (cmd) {                           1306         switch (cmd) {
1409         case F_SETPIPE_SZ:                       1307         case F_SETPIPE_SZ:
1410                 ret = pipe_set_size(pipe, arg    1308                 ret = pipe_set_size(pipe, arg);
1411                 break;                           1309                 break;
1412         case F_GETPIPE_SZ:                       1310         case F_GETPIPE_SZ:
1413                 ret = pipe->max_usage * PAGE_    1311                 ret = pipe->max_usage * PAGE_SIZE;
1414                 break;                           1312                 break;
1415         default:                                 1313         default:
1416                 ret = -EINVAL;                   1314                 ret = -EINVAL;
1417                 break;                           1315                 break;
1418         }                                        1316         }
1419                                                  1317 
1420         mutex_unlock(&pipe->mutex);           !! 1318         __pipe_unlock(pipe);
1421         return ret;                              1319         return ret;
1422 }                                                1320 }
1423                                                  1321 
1424 static const struct super_operations pipefs_o    1322 static const struct super_operations pipefs_ops = {
1425         .destroy_inode = free_inode_nonrcu,      1323         .destroy_inode = free_inode_nonrcu,
1426         .statfs = simple_statfs,                 1324         .statfs = simple_statfs,
1427 };                                               1325 };
1428                                                  1326 
1429 /*                                               1327 /*
1430  * pipefs should _never_ be mounted by userla    1328  * pipefs should _never_ be mounted by userland - too much of security hassle,
1431  * no real gain from having the whole file sy !! 1329  * no real gain from having the whole whorehouse mounted. So we don't need
1432  * any operations on the root directory. Howe    1330  * any operations on the root directory. However, we need a non-trivial
1433  * d_name - pipe: will go nicely and kill the    1331  * d_name - pipe: will go nicely and kill the special-casing in procfs.
1434  */                                              1332  */
1435                                                  1333 
1436 static int pipefs_init_fs_context(struct fs_c    1334 static int pipefs_init_fs_context(struct fs_context *fc)
1437 {                                                1335 {
1438         struct pseudo_fs_context *ctx = init_    1336         struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
1439         if (!ctx)                                1337         if (!ctx)
1440                 return -ENOMEM;                  1338                 return -ENOMEM;
1441         ctx->ops = &pipefs_ops;                  1339         ctx->ops = &pipefs_ops;
1442         ctx->dops = &pipefs_dentry_operations    1340         ctx->dops = &pipefs_dentry_operations;
1443         return 0;                                1341         return 0;
1444 }                                                1342 }
1445                                                  1343 
1446 static struct file_system_type pipe_fs_type =    1344 static struct file_system_type pipe_fs_type = {
1447         .name           = "pipefs",              1345         .name           = "pipefs",
1448         .init_fs_context = pipefs_init_fs_con    1346         .init_fs_context = pipefs_init_fs_context,
1449         .kill_sb        = kill_anon_super,       1347         .kill_sb        = kill_anon_super,
1450 };                                               1348 };
1451                                                  1349 
1452 #ifdef CONFIG_SYSCTL                          << 
1453 static int do_proc_dopipe_max_size_conv(unsig << 
1454                                         unsig << 
1455                                         int w << 
1456 {                                             << 
1457         if (write) {                          << 
1458                 unsigned int val;             << 
1459                                               << 
1460                 val = round_pipe_size(*lvalp) << 
1461                 if (val == 0)                 << 
1462                         return -EINVAL;       << 
1463                                               << 
1464                 *valp = val;                  << 
1465         } else {                              << 
1466                 unsigned int val = *valp;     << 
1467                 *lvalp = (unsigned long) val; << 
1468         }                                     << 
1469                                               << 
1470         return 0;                             << 
1471 }                                             << 
1472                                               << 
1473 static int proc_dopipe_max_size(const struct  << 
1474                                 void *buffer, << 
1475 {                                             << 
1476         return do_proc_douintvec(table, write << 
1477                                  do_proc_dopi << 
1478 }                                             << 
1479                                               << 
1480 static struct ctl_table fs_pipe_sysctls[] = { << 
1481         {                                     << 
1482                 .procname       = "pipe-max-s << 
1483                 .data           = &pipe_max_s << 
1484                 .maxlen         = sizeof(pipe << 
1485                 .mode           = 0644,       << 
1486                 .proc_handler   = proc_dopipe << 
1487         },                                    << 
1488         {                                     << 
1489                 .procname       = "pipe-user- << 
1490                 .data           = &pipe_user_ << 
1491                 .maxlen         = sizeof(pipe << 
1492                 .mode           = 0644,       << 
1493                 .proc_handler   = proc_doulon << 
1494         },                                    << 
1495         {                                     << 
1496                 .procname       = "pipe-user- << 
1497                 .data           = &pipe_user_ << 
1498                 .maxlen         = sizeof(pipe << 
1499                 .mode           = 0644,       << 
1500                 .proc_handler   = proc_doulon << 
1501         },                                    << 
1502 };                                            << 
1503 #endif                                        << 
1504                                               << 
1505 static int __init init_pipe_fs(void)             1350 static int __init init_pipe_fs(void)
1506 {                                                1351 {
1507         int err = register_filesystem(&pipe_f    1352         int err = register_filesystem(&pipe_fs_type);
1508                                                  1353 
1509         if (!err) {                              1354         if (!err) {
1510                 pipe_mnt = kern_mount(&pipe_f    1355                 pipe_mnt = kern_mount(&pipe_fs_type);
1511                 if (IS_ERR(pipe_mnt)) {          1356                 if (IS_ERR(pipe_mnt)) {
1512                         err = PTR_ERR(pipe_mn    1357                         err = PTR_ERR(pipe_mnt);
1513                         unregister_filesystem    1358                         unregister_filesystem(&pipe_fs_type);
1514                 }                                1359                 }
1515         }                                        1360         }
1516 #ifdef CONFIG_SYSCTL                          << 
1517         register_sysctl_init("fs", fs_pipe_sy << 
1518 #endif                                        << 
1519         return err;                              1361         return err;
1520 }                                                1362 }
1521                                                  1363 
1522 fs_initcall(init_pipe_fs);                       1364 fs_initcall(init_pipe_fs);
1523                                                  1365 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php