~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/memfd.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/memfd.c (Version linux-6.12-rc7) and /mm/memfd.c (Version linux-6.3.13)


  1 /*                                                  1 /*
  2  * memfd_create system call and file sealing s      2  * memfd_create system call and file sealing support
  3  *                                                  3  *
  4  * Code was originally included in shmem.c, an      4  * Code was originally included in shmem.c, and broken out to facilitate
  5  * use by hugetlbfs as well as tmpfs.               5  * use by hugetlbfs as well as tmpfs.
  6  *                                                  6  *
  7  * This file is released under the GPL.             7  * This file is released under the GPL.
  8  */                                                 8  */
  9                                                     9 
 10 #include <linux/fs.h>                              10 #include <linux/fs.h>
 11 #include <linux/vfs.h>                             11 #include <linux/vfs.h>
 12 #include <linux/pagemap.h>                         12 #include <linux/pagemap.h>
 13 #include <linux/file.h>                            13 #include <linux/file.h>
 14 #include <linux/mm.h>                              14 #include <linux/mm.h>
 15 #include <linux/sched/signal.h>                    15 #include <linux/sched/signal.h>
 16 #include <linux/khugepaged.h>                      16 #include <linux/khugepaged.h>
 17 #include <linux/syscalls.h>                        17 #include <linux/syscalls.h>
 18 #include <linux/hugetlb.h>                         18 #include <linux/hugetlb.h>
 19 #include <linux/shmem_fs.h>                        19 #include <linux/shmem_fs.h>
 20 #include <linux/memfd.h>                           20 #include <linux/memfd.h>
 21 #include <linux/pid_namespace.h>                   21 #include <linux/pid_namespace.h>
 22 #include <uapi/linux/memfd.h>                      22 #include <uapi/linux/memfd.h>
 23                                                    23 
 24 /*                                                 24 /*
 25  * We need a tag: a new tag would expand every     25  * We need a tag: a new tag would expand every xa_node by 8 bytes,
 26  * so reuse a tag which we firmly believe is n     26  * so reuse a tag which we firmly believe is never set or cleared on tmpfs
 27  * or hugetlbfs because they are memory only f     27  * or hugetlbfs because they are memory only filesystems.
 28  */                                                28  */
 29 #define MEMFD_TAG_PINNED        PAGECACHE_TAG_     29 #define MEMFD_TAG_PINNED        PAGECACHE_TAG_TOWRITE
 30 #define LAST_SCAN               4       /* abo     30 #define LAST_SCAN               4       /* about 150ms max */
 31                                                    31 
 32 static bool memfd_folio_has_extra_refs(struct  << 
 33 {                                              << 
 34         return folio_ref_count(folio) - folio_ << 
 35                folio_nr_pages(folio);          << 
 36 }                                              << 
 37                                                << 
 38 static void memfd_tag_pins(struct xa_state *xa     32 static void memfd_tag_pins(struct xa_state *xas)
 39 {                                                  33 {
 40         struct folio *folio;                   !!  34         struct page *page;
 41         int latency = 0;                           35         int latency = 0;
                                                   >>  36         int cache_count;
 42                                                    37 
 43         lru_add_drain();                           38         lru_add_drain();
 44                                                    39 
 45         xas_lock_irq(xas);                         40         xas_lock_irq(xas);
 46         xas_for_each(xas, folio, ULONG_MAX) {  !!  41         xas_for_each(xas, page, ULONG_MAX) {
 47                 if (!xa_is_value(folio) && mem !!  42                 cache_count = 1;
                                                   >>  43                 if (!xa_is_value(page) &&
                                                   >>  44                     PageTransHuge(page) && !PageHuge(page))
                                                   >>  45                         cache_count = HPAGE_PMD_NR;
                                                   >>  46 
                                                   >>  47                 if (!xa_is_value(page) &&
                                                   >>  48                     page_count(page) - total_mapcount(page) != cache_count)
 48                         xas_set_mark(xas, MEMF     49                         xas_set_mark(xas, MEMFD_TAG_PINNED);
                                                   >>  50                 if (cache_count != 1)
                                                   >>  51                         xas_set(xas, page->index + cache_count);
 49                                                    52 
 50                 if (++latency < XA_CHECK_SCHED !!  53                 latency += cache_count;
                                                   >>  54                 if (latency < XA_CHECK_SCHED)
 51                         continue;                  55                         continue;
 52                 latency = 0;                       56                 latency = 0;
 53                                                    57 
 54                 xas_pause(xas);                    58                 xas_pause(xas);
 55                 xas_unlock_irq(xas);               59                 xas_unlock_irq(xas);
 56                 cond_resched();                    60                 cond_resched();
 57                 xas_lock_irq(xas);                 61                 xas_lock_irq(xas);
 58         }                                          62         }
 59         xas_unlock_irq(xas);                       63         xas_unlock_irq(xas);
 60 }                                                  64 }
 61                                                    65 
 62 /*                                                 66 /*
 63  * This is a helper function used by memfd_pin << 
 64  * It is mainly called to allocate a folio in  << 
 65  * (memfd_pin_folios()) cannot find a folio in << 
 66  * index in the mapping.                       << 
 67  */                                            << 
 68 struct folio *memfd_alloc_folio(struct file *m << 
 69 {                                              << 
 70 #ifdef CONFIG_HUGETLB_PAGE                     << 
 71         struct folio *folio;                   << 
 72         gfp_t gfp_mask;                        << 
 73         int err;                               << 
 74                                                << 
 75         if (is_file_hugepages(memfd)) {        << 
 76                 /*                             << 
 77                  * The folio would most likely << 
 78                  * therefore, we have zone mem << 
 79                  * alloc from. Also, the folio << 
 80                  * amount of time, so it is no << 
 81                  */                            << 
 82                 struct hstate *h = hstate_file << 
 83                                                << 
 84                 gfp_mask = htlb_alloc_mask(h); << 
 85                 gfp_mask &= ~(__GFP_HIGHMEM |  << 
 86                 idx >>= huge_page_order(h);    << 
 87                                                << 
 88                 folio = alloc_hugetlb_folio_re << 
 89                                                << 
 90                                                << 
 91                                                << 
 92                 if (folio) {                   << 
 93                         err = hugetlb_add_to_p << 
 94                                                << 
 95                                                << 
 96                         if (err) {             << 
 97                                 folio_put(foli << 
 98                                 return ERR_PTR << 
 99                         }                      << 
100                         folio_unlock(folio);   << 
101                         return folio;          << 
102                 }                              << 
103                 return ERR_PTR(-ENOMEM);       << 
104         }                                      << 
105 #endif                                         << 
106         return shmem_read_folio(memfd->f_mappi << 
107 }                                              << 
108                                                << 
109 /*                                             << 
110  * Setting SEAL_WRITE requires us to verify th     67  * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
111  * via get_user_pages(), drivers might have so     68  * via get_user_pages(), drivers might have some pending I/O without any active
112  * user-space mappings (eg., direct-IO, AIO).  !!  69  * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
113  * and see whether it has an elevated ref-coun     70  * and see whether it has an elevated ref-count. If so, we tag them and wait for
114  * them to be dropped.                             71  * them to be dropped.
115  * The caller must guarantee that no new user      72  * The caller must guarantee that no new user will acquire writable references
116  * to those folios to avoid races.             !!  73  * to those pages to avoid races.
117  */                                                74  */
118 static int memfd_wait_for_pins(struct address_     75 static int memfd_wait_for_pins(struct address_space *mapping)
119 {                                                  76 {
120         XA_STATE(xas, &mapping->i_pages, 0);       77         XA_STATE(xas, &mapping->i_pages, 0);
121         struct folio *folio;                   !!  78         struct page *page;
122         int error, scan;                           79         int error, scan;
123                                                    80 
124         memfd_tag_pins(&xas);                      81         memfd_tag_pins(&xas);
125                                                    82 
126         error = 0;                                 83         error = 0;
127         for (scan = 0; scan <= LAST_SCAN; scan     84         for (scan = 0; scan <= LAST_SCAN; scan++) {
128                 int latency = 0;                   85                 int latency = 0;
                                                   >>  86                 int cache_count;
129                                                    87 
130                 if (!xas_marked(&xas, MEMFD_TA     88                 if (!xas_marked(&xas, MEMFD_TAG_PINNED))
131                         break;                     89                         break;
132                                                    90 
133                 if (!scan)                         91                 if (!scan)
134                         lru_add_drain_all();       92                         lru_add_drain_all();
135                 else if (schedule_timeout_kill     93                 else if (schedule_timeout_killable((HZ << scan) / 200))
136                         scan = LAST_SCAN;          94                         scan = LAST_SCAN;
137                                                    95 
138                 xas_set(&xas, 0);                  96                 xas_set(&xas, 0);
139                 xas_lock_irq(&xas);                97                 xas_lock_irq(&xas);
140                 xas_for_each_marked(&xas, foli !!  98                 xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
141                         bool clear = true;         99                         bool clear = true;
142                                                   100 
143                         if (!xa_is_value(folio !! 101                         cache_count = 1;
144                             memfd_folio_has_ex !! 102                         if (!xa_is_value(page) &&
                                                   >> 103                             PageTransHuge(page) && !PageHuge(page))
                                                   >> 104                                 cache_count = HPAGE_PMD_NR;
                                                   >> 105 
                                                   >> 106                         if (!xa_is_value(page) && cache_count !=
                                                   >> 107                             page_count(page) - total_mapcount(page)) {
145                                 /*                108                                 /*
146                                  * On the last    109                                  * On the last scan, we clean up all those tags
147                                  * we inserted    110                                  * we inserted; but make a note that we still
148                                  * found folio !! 111                                  * found pages pinned.
149                                  */               112                                  */
150                                 if (scan == LA    113                                 if (scan == LAST_SCAN)
151                                         error     114                                         error = -EBUSY;
152                                 else              115                                 else
153                                         clear     116                                         clear = false;
154                         }                         117                         }
155                         if (clear)                118                         if (clear)
156                                 xas_clear_mark    119                                 xas_clear_mark(&xas, MEMFD_TAG_PINNED);
157                                                   120 
158                         if (++latency < XA_CHE !! 121                         latency += cache_count;
                                                   >> 122                         if (latency < XA_CHECK_SCHED)
159                                 continue;         123                                 continue;
160                         latency = 0;              124                         latency = 0;
161                                                   125 
162                         xas_pause(&xas);          126                         xas_pause(&xas);
163                         xas_unlock_irq(&xas);     127                         xas_unlock_irq(&xas);
164                         cond_resched();           128                         cond_resched();
165                         xas_lock_irq(&xas);       129                         xas_lock_irq(&xas);
166                 }                                 130                 }
167                 xas_unlock_irq(&xas);             131                 xas_unlock_irq(&xas);
168         }                                         132         }
169                                                   133 
170         return error;                             134         return error;
171 }                                                 135 }
172                                                   136 
173 static unsigned int *memfd_file_seals_ptr(stru    137 static unsigned int *memfd_file_seals_ptr(struct file *file)
174 {                                                 138 {
175         if (shmem_file(file))                     139         if (shmem_file(file))
176                 return &SHMEM_I(file_inode(fil    140                 return &SHMEM_I(file_inode(file))->seals;
177                                                   141 
178 #ifdef CONFIG_HUGETLBFS                           142 #ifdef CONFIG_HUGETLBFS
179         if (is_file_hugepages(file))              143         if (is_file_hugepages(file))
180                 return &HUGETLBFS_I(file_inode    144                 return &HUGETLBFS_I(file_inode(file))->seals;
181 #endif                                            145 #endif
182                                                   146 
183         return NULL;                              147         return NULL;
184 }                                                 148 }
185                                                   149 
186 #define F_ALL_SEALS (F_SEAL_SEAL | \              150 #define F_ALL_SEALS (F_SEAL_SEAL | \
187                      F_SEAL_EXEC | \              151                      F_SEAL_EXEC | \
188                      F_SEAL_SHRINK | \            152                      F_SEAL_SHRINK | \
189                      F_SEAL_GROW | \              153                      F_SEAL_GROW | \
190                      F_SEAL_WRITE | \             154                      F_SEAL_WRITE | \
191                      F_SEAL_FUTURE_WRITE)         155                      F_SEAL_FUTURE_WRITE)
192                                                   156 
193 static int memfd_add_seals(struct file *file,     157 static int memfd_add_seals(struct file *file, unsigned int seals)
194 {                                                 158 {
195         struct inode *inode = file_inode(file)    159         struct inode *inode = file_inode(file);
196         unsigned int *file_seals;                 160         unsigned int *file_seals;
197         int error;                                161         int error;
198                                                   162 
199         /*                                        163         /*
200          * SEALING                                164          * SEALING
201          * Sealing allows multiple parties to     165          * Sealing allows multiple parties to share a tmpfs or hugetlbfs file
202          * but restrict access to a specific s    166          * but restrict access to a specific subset of file operations. Seals
203          * can only be added, but never remove    167          * can only be added, but never removed. This way, mutually untrusted
204          * parties can share common memory reg    168          * parties can share common memory regions with a well-defined policy.
205          * A malicious peer can thus never per    169          * A malicious peer can thus never perform unwanted operations on a
206          * shared object.                         170          * shared object.
207          *                                        171          *
208          * Seals are only supported on special    172          * Seals are only supported on special tmpfs or hugetlbfs files and
209          * always affect the whole underlying     173          * always affect the whole underlying inode. Once a seal is set, it
210          * may prevent some kinds of access to    174          * may prevent some kinds of access to the file. Currently, the
211          * following seals are defined:           175          * following seals are defined:
212          *   SEAL_SEAL: Prevent further seals     176          *   SEAL_SEAL: Prevent further seals from being set on this file
213          *   SEAL_SHRINK: Prevent the file fro    177          *   SEAL_SHRINK: Prevent the file from shrinking
214          *   SEAL_GROW: Prevent the file from     178          *   SEAL_GROW: Prevent the file from growing
215          *   SEAL_WRITE: Prevent write access     179          *   SEAL_WRITE: Prevent write access to the file
216          *   SEAL_EXEC: Prevent modification o    180          *   SEAL_EXEC: Prevent modification of the exec bits in the file mode
217          *                                        181          *
218          * As we don't require any trust relat    182          * As we don't require any trust relationship between two parties, we
219          * must prevent seals from being remov    183          * must prevent seals from being removed. Therefore, sealing a file
220          * only adds a given set of seals to t    184          * only adds a given set of seals to the file, it never touches
221          * existing seals. Furthermore, the "s    185          * existing seals. Furthermore, the "setting seals"-operation can be
222          * sealed itself, which basically prev    186          * sealed itself, which basically prevents any further seal from being
223          * added.                                 187          * added.
224          *                                        188          *
225          * Semantics of sealing are only defin    189          * Semantics of sealing are only defined on volatile files. Only
226          * anonymous tmpfs and hugetlbfs files    190          * anonymous tmpfs and hugetlbfs files support sealing. More
227          * importantly, seals are never writte    191          * importantly, seals are never written to disk. Therefore, there's
228          * no plan to support it on other file    192          * no plan to support it on other file types.
229          */                                       193          */
230                                                   194 
231         if (!(file->f_mode & FMODE_WRITE))        195         if (!(file->f_mode & FMODE_WRITE))
232                 return -EPERM;                    196                 return -EPERM;
233         if (seals & ~(unsigned int)F_ALL_SEALS    197         if (seals & ~(unsigned int)F_ALL_SEALS)
234                 return -EINVAL;                   198                 return -EINVAL;
235                                                   199 
236         inode_lock(inode);                        200         inode_lock(inode);
237                                                   201 
238         file_seals = memfd_file_seals_ptr(file    202         file_seals = memfd_file_seals_ptr(file);
239         if (!file_seals) {                        203         if (!file_seals) {
240                 error = -EINVAL;                  204                 error = -EINVAL;
241                 goto unlock;                      205                 goto unlock;
242         }                                         206         }
243                                                   207 
244         if (*file_seals & F_SEAL_SEAL) {          208         if (*file_seals & F_SEAL_SEAL) {
245                 error = -EPERM;                   209                 error = -EPERM;
246                 goto unlock;                      210                 goto unlock;
247         }                                         211         }
248                                                   212 
249         if ((seals & F_SEAL_WRITE) && !(*file_    213         if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) {
250                 error = mapping_deny_writable(    214                 error = mapping_deny_writable(file->f_mapping);
251                 if (error)                        215                 if (error)
252                         goto unlock;              216                         goto unlock;
253                                                   217 
254                 error = memfd_wait_for_pins(fi    218                 error = memfd_wait_for_pins(file->f_mapping);
255                 if (error) {                      219                 if (error) {
256                         mapping_allow_writable    220                         mapping_allow_writable(file->f_mapping);
257                         goto unlock;              221                         goto unlock;
258                 }                                 222                 }
259         }                                         223         }
260                                                   224 
261         /*                                        225         /*
262          * SEAL_EXEC implys SEAL_WRITE, making    226          * SEAL_EXEC implys SEAL_WRITE, making W^X from the start.
263          */                                       227          */
264         if (seals & F_SEAL_EXEC && inode->i_mo    228         if (seals & F_SEAL_EXEC && inode->i_mode & 0111)
265                 seals |= F_SEAL_SHRINK|F_SEAL_    229                 seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE;
266                                                   230 
267         *file_seals |= seals;                     231         *file_seals |= seals;
268         error = 0;                                232         error = 0;
269                                                   233 
270 unlock:                                           234 unlock:
271         inode_unlock(inode);                      235         inode_unlock(inode);
272         return error;                             236         return error;
273 }                                                 237 }
274                                                   238 
275 static int memfd_get_seals(struct file *file)     239 static int memfd_get_seals(struct file *file)
276 {                                                 240 {
277         unsigned int *seals = memfd_file_seals    241         unsigned int *seals = memfd_file_seals_ptr(file);
278                                                   242 
279         return seals ? *seals : -EINVAL;          243         return seals ? *seals : -EINVAL;
280 }                                                 244 }
281                                                   245 
282 long memfd_fcntl(struct file *file, unsigned i !! 246 long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
283 {                                                 247 {
284         long error;                               248         long error;
285                                                   249 
286         switch (cmd) {                            250         switch (cmd) {
287         case F_ADD_SEALS:                         251         case F_ADD_SEALS:
                                                   >> 252                 /* disallow upper 32bit */
                                                   >> 253                 if (arg > UINT_MAX)
                                                   >> 254                         return -EINVAL;
                                                   >> 255 
288                 error = memfd_add_seals(file,     256                 error = memfd_add_seals(file, arg);
289                 break;                            257                 break;
290         case F_GET_SEALS:                         258         case F_GET_SEALS:
291                 error = memfd_get_seals(file);    259                 error = memfd_get_seals(file);
292                 break;                            260                 break;
293         default:                                  261         default:
294                 error = -EINVAL;                  262                 error = -EINVAL;
295                 break;                            263                 break;
296         }                                         264         }
297                                                   265 
298         return error;                             266         return error;
299 }                                                 267 }
300                                                   268 
301 #define MFD_NAME_PREFIX "memfd:"                  269 #define MFD_NAME_PREFIX "memfd:"
302 #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_P    270 #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
303 #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_    271 #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
304                                                   272 
305 #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW    273 #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_NOEXEC_SEAL | MFD_EXEC)
306                                                   274 
307 static int check_sysctl_memfd_noexec(unsigned  << 
308 {                                              << 
309 #ifdef CONFIG_SYSCTL                           << 
310         struct pid_namespace *ns = task_active << 
311         int sysctl = pidns_memfd_noexec_scope( << 
312                                                << 
313         if (!(*flags & (MFD_EXEC | MFD_NOEXEC_ << 
314                 if (sysctl >= MEMFD_NOEXEC_SCO << 
315                         *flags |= MFD_NOEXEC_S << 
316                 else                           << 
317                         *flags |= MFD_EXEC;    << 
318         }                                      << 
319                                                << 
320         if (!(*flags & MFD_NOEXEC_SEAL) && sys << 
321                 pr_err_ratelimited(            << 
322                         "%s[%d]: memfd_create( << 
323                         current->comm, task_pi << 
324                 return -EACCES;                << 
325         }                                      << 
326 #endif                                         << 
327         return 0;                              << 
328 }                                              << 
329                                                << 
330 SYSCALL_DEFINE2(memfd_create,                     275 SYSCALL_DEFINE2(memfd_create,
331                 const char __user *, uname,       276                 const char __user *, uname,
332                 unsigned int, flags)              277                 unsigned int, flags)
333 {                                                 278 {
                                                   >> 279         char comm[TASK_COMM_LEN];
334         unsigned int *file_seals;                 280         unsigned int *file_seals;
335         struct file *file;                        281         struct file *file;
336         int fd, error;                            282         int fd, error;
337         char *name;                               283         char *name;
338         long len;                                 284         long len;
339                                                   285 
340         if (!(flags & MFD_HUGETLB)) {             286         if (!(flags & MFD_HUGETLB)) {
341                 if (flags & ~(unsigned int)MFD    287                 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
342                         return -EINVAL;           288                         return -EINVAL;
343         } else {                                  289         } else {
344                 /* Allow huge page size encodi    290                 /* Allow huge page size encoding in flags. */
345                 if (flags & ~(unsigned int)(MF    291                 if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
346                                 (MFD_HUGE_MASK    292                                 (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
347                         return -EINVAL;           293                         return -EINVAL;
348         }                                         294         }
349                                                   295 
350         /* Invalid if both EXEC and NOEXEC_SEA    296         /* Invalid if both EXEC and NOEXEC_SEAL are set.*/
351         if ((flags & MFD_EXEC) && (flags & MFD    297         if ((flags & MFD_EXEC) && (flags & MFD_NOEXEC_SEAL))
352                 return -EINVAL;                   298                 return -EINVAL;
353                                                   299 
354         error = check_sysctl_memfd_noexec(&fla !! 300         if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
355         if (error < 0)                         !! 301 #ifdef CONFIG_SYSCTL
356                 return error;                  !! 302                 int sysctl = MEMFD_NOEXEC_SCOPE_EXEC;
                                                   >> 303                 struct pid_namespace *ns;
                                                   >> 304 
                                                   >> 305                 ns = task_active_pid_ns(current);
                                                   >> 306                 if (ns)
                                                   >> 307                         sysctl = ns->memfd_noexec_scope;
                                                   >> 308 
                                                   >> 309                 switch (sysctl) {
                                                   >> 310                 case MEMFD_NOEXEC_SCOPE_EXEC:
                                                   >> 311                         flags |= MFD_EXEC;
                                                   >> 312                         break;
                                                   >> 313                 case MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL:
                                                   >> 314                         flags |= MFD_NOEXEC_SEAL;
                                                   >> 315                         break;
                                                   >> 316                 default:
                                                   >> 317                         pr_warn_once(
                                                   >> 318                                 "memfd_create(): MFD_NOEXEC_SEAL is enforced, pid=%d '%s'\n",
                                                   >> 319                                 task_pid_nr(current), get_task_comm(comm, current));
                                                   >> 320                         return -EINVAL;
                                                   >> 321                 }
                                                   >> 322 #else
                                                   >> 323                 flags |= MFD_EXEC;
                                                   >> 324 #endif
                                                   >> 325                 pr_warn_once(
                                                   >> 326                         "memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL, pid=%d '%s'\n",
                                                   >> 327                         task_pid_nr(current), get_task_comm(comm, current));
                                                   >> 328         }
357                                                   329 
358         /* length includes terminating zero */    330         /* length includes terminating zero */
359         len = strnlen_user(uname, MFD_NAME_MAX    331         len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
360         if (len <= 0)                             332         if (len <= 0)
361                 return -EFAULT;                   333                 return -EFAULT;
362         if (len > MFD_NAME_MAX_LEN + 1)           334         if (len > MFD_NAME_MAX_LEN + 1)
363                 return -EINVAL;                   335                 return -EINVAL;
364                                                   336 
365         name = kmalloc(len + MFD_NAME_PREFIX_L    337         name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL);
366         if (!name)                                338         if (!name)
367                 return -ENOMEM;                   339                 return -ENOMEM;
368                                                   340 
369         strcpy(name, MFD_NAME_PREFIX);            341         strcpy(name, MFD_NAME_PREFIX);
370         if (copy_from_user(&name[MFD_NAME_PREF    342         if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
371                 error = -EFAULT;                  343                 error = -EFAULT;
372                 goto err_name;                    344                 goto err_name;
373         }                                         345         }
374                                                   346 
375         /* terminating-zero may have changed a    347         /* terminating-zero may have changed after strnlen_user() returned */
376         if (name[len + MFD_NAME_PREFIX_LEN - 1    348         if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
377                 error = -EFAULT;                  349                 error = -EFAULT;
378                 goto err_name;                    350                 goto err_name;
379         }                                         351         }
380                                                   352 
381         fd = get_unused_fd_flags((flags & MFD_    353         fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
382         if (fd < 0) {                             354         if (fd < 0) {
383                 error = fd;                       355                 error = fd;
384                 goto err_name;                    356                 goto err_name;
385         }                                         357         }
386                                                   358 
387         if (flags & MFD_HUGETLB) {                359         if (flags & MFD_HUGETLB) {
388                 file = hugetlb_file_setup(name    360                 file = hugetlb_file_setup(name, 0, VM_NORESERVE,
389                                         HUGETL    361                                         HUGETLB_ANONHUGE_INODE,
390                                         (flags    362                                         (flags >> MFD_HUGE_SHIFT) &
391                                         MFD_HU    363                                         MFD_HUGE_MASK);
392         } else                                    364         } else
393                 file = shmem_file_setup(name,     365                 file = shmem_file_setup(name, 0, VM_NORESERVE);
394         if (IS_ERR(file)) {                       366         if (IS_ERR(file)) {
395                 error = PTR_ERR(file);            367                 error = PTR_ERR(file);
396                 goto err_fd;                      368                 goto err_fd;
397         }                                         369         }
398         file->f_mode |= FMODE_LSEEK | FMODE_PR    370         file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
399         file->f_flags |= O_LARGEFILE;             371         file->f_flags |= O_LARGEFILE;
400                                                   372 
401         if (flags & MFD_NOEXEC_SEAL) {            373         if (flags & MFD_NOEXEC_SEAL) {
402                 struct inode *inode = file_ino    374                 struct inode *inode = file_inode(file);
403                                                   375 
404                 inode->i_mode &= ~0111;           376                 inode->i_mode &= ~0111;
405                 file_seals = memfd_file_seals_    377                 file_seals = memfd_file_seals_ptr(file);
406                 if (file_seals) {                 378                 if (file_seals) {
407                         *file_seals &= ~F_SEAL    379                         *file_seals &= ~F_SEAL_SEAL;
408                         *file_seals |= F_SEAL_    380                         *file_seals |= F_SEAL_EXEC;
409                 }                                 381                 }
410         } else if (flags & MFD_ALLOW_SEALING)     382         } else if (flags & MFD_ALLOW_SEALING) {
411                 /* MFD_EXEC and MFD_ALLOW_SEAL    383                 /* MFD_EXEC and MFD_ALLOW_SEALING are set */
412                 file_seals = memfd_file_seals_    384                 file_seals = memfd_file_seals_ptr(file);
413                 if (file_seals)                   385                 if (file_seals)
414                         *file_seals &= ~F_SEAL    386                         *file_seals &= ~F_SEAL_SEAL;
415         }                                         387         }
416                                                   388 
417         fd_install(fd, file);                     389         fd_install(fd, file);
418         kfree(name);                              390         kfree(name);
419         return fd;                                391         return fd;
420                                                   392 
421 err_fd:                                           393 err_fd:
422         put_unused_fd(fd);                        394         put_unused_fd(fd);
423 err_name:                                         395 err_name:
424         kfree(name);                              396         kfree(name);
425         return error;                             397         return error;
426 }                                                 398 }
427                                                   399 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php