~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/memfd.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/memfd.c (Version linux-6.12-rc7) and /mm/memfd.c (Version linux-6.5.13)


  1 /*                                                  1 /*
  2  * memfd_create system call and file sealing s      2  * memfd_create system call and file sealing support
  3  *                                                  3  *
  4  * Code was originally included in shmem.c, an      4  * Code was originally included in shmem.c, and broken out to facilitate
  5  * use by hugetlbfs as well as tmpfs.               5  * use by hugetlbfs as well as tmpfs.
  6  *                                                  6  *
  7  * This file is released under the GPL.             7  * This file is released under the GPL.
  8  */                                                 8  */
  9                                                     9 
 10 #include <linux/fs.h>                              10 #include <linux/fs.h>
 11 #include <linux/vfs.h>                             11 #include <linux/vfs.h>
 12 #include <linux/pagemap.h>                         12 #include <linux/pagemap.h>
 13 #include <linux/file.h>                            13 #include <linux/file.h>
 14 #include <linux/mm.h>                              14 #include <linux/mm.h>
 15 #include <linux/sched/signal.h>                    15 #include <linux/sched/signal.h>
 16 #include <linux/khugepaged.h>                      16 #include <linux/khugepaged.h>
 17 #include <linux/syscalls.h>                        17 #include <linux/syscalls.h>
 18 #include <linux/hugetlb.h>                         18 #include <linux/hugetlb.h>
 19 #include <linux/shmem_fs.h>                        19 #include <linux/shmem_fs.h>
 20 #include <linux/memfd.h>                           20 #include <linux/memfd.h>
 21 #include <linux/pid_namespace.h>                   21 #include <linux/pid_namespace.h>
 22 #include <uapi/linux/memfd.h>                      22 #include <uapi/linux/memfd.h>
 23                                                    23 
 24 /*                                                 24 /*
 25  * We need a tag: a new tag would expand every     25  * We need a tag: a new tag would expand every xa_node by 8 bytes,
 26  * so reuse a tag which we firmly believe is n     26  * so reuse a tag which we firmly believe is never set or cleared on tmpfs
 27  * or hugetlbfs because they are memory only f     27  * or hugetlbfs because they are memory only filesystems.
 28  */                                                28  */
 29 #define MEMFD_TAG_PINNED        PAGECACHE_TAG_     29 #define MEMFD_TAG_PINNED        PAGECACHE_TAG_TOWRITE
 30 #define LAST_SCAN               4       /* abo     30 #define LAST_SCAN               4       /* about 150ms max */
 31                                                    31 
 32 static bool memfd_folio_has_extra_refs(struct  << 
 33 {                                              << 
 34         return folio_ref_count(folio) - folio_ << 
 35                folio_nr_pages(folio);          << 
 36 }                                              << 
 37                                                << 
 38 static void memfd_tag_pins(struct xa_state *xa     32 static void memfd_tag_pins(struct xa_state *xas)
 39 {                                                  33 {
 40         struct folio *folio;                   !!  34         struct page *page;
 41         int latency = 0;                           35         int latency = 0;
                                                   >>  36         int cache_count;
 42                                                    37 
 43         lru_add_drain();                           38         lru_add_drain();
 44                                                    39 
 45         xas_lock_irq(xas);                         40         xas_lock_irq(xas);
 46         xas_for_each(xas, folio, ULONG_MAX) {  !!  41         xas_for_each(xas, page, ULONG_MAX) {
 47                 if (!xa_is_value(folio) && mem !!  42                 cache_count = 1;
                                                   >>  43                 if (!xa_is_value(page) &&
                                                   >>  44                     PageTransHuge(page) && !PageHuge(page))
                                                   >>  45                         cache_count = HPAGE_PMD_NR;
                                                   >>  46 
                                                   >>  47                 if (!xa_is_value(page) &&
                                                   >>  48                     page_count(page) - total_mapcount(page) != cache_count)
 48                         xas_set_mark(xas, MEMF     49                         xas_set_mark(xas, MEMFD_TAG_PINNED);
                                                   >>  50                 if (cache_count != 1)
                                                   >>  51                         xas_set(xas, page->index + cache_count);
 49                                                    52 
 50                 if (++latency < XA_CHECK_SCHED !!  53                 latency += cache_count;
                                                   >>  54                 if (latency < XA_CHECK_SCHED)
 51                         continue;                  55                         continue;
 52                 latency = 0;                       56                 latency = 0;
 53                                                    57 
 54                 xas_pause(xas);                    58                 xas_pause(xas);
 55                 xas_unlock_irq(xas);               59                 xas_unlock_irq(xas);
 56                 cond_resched();                    60                 cond_resched();
 57                 xas_lock_irq(xas);                 61                 xas_lock_irq(xas);
 58         }                                          62         }
 59         xas_unlock_irq(xas);                       63         xas_unlock_irq(xas);
 60 }                                                  64 }
 61                                                    65 
 62 /*                                                 66 /*
 63  * This is a helper function used by memfd_pin << 
 64  * It is mainly called to allocate a folio in  << 
 65  * (memfd_pin_folios()) cannot find a folio in << 
 66  * index in the mapping.                       << 
 67  */                                            << 
 68 struct folio *memfd_alloc_folio(struct file *m << 
 69 {                                              << 
 70 #ifdef CONFIG_HUGETLB_PAGE                     << 
 71         struct folio *folio;                   << 
 72         gfp_t gfp_mask;                        << 
 73         int err;                               << 
 74                                                << 
 75         if (is_file_hugepages(memfd)) {        << 
 76                 /*                             << 
 77                  * The folio would most likely << 
 78                  * therefore, we have zone mem << 
 79                  * alloc from. Also, the folio << 
 80                  * amount of time, so it is no << 
 81                  */                            << 
 82                 struct hstate *h = hstate_file << 
 83                                                << 
 84                 gfp_mask = htlb_alloc_mask(h); << 
 85                 gfp_mask &= ~(__GFP_HIGHMEM |  << 
 86                 idx >>= huge_page_order(h);    << 
 87                                                << 
 88                 folio = alloc_hugetlb_folio_re << 
 89                                                << 
 90                                                << 
 91                                                << 
 92                 if (folio) {                   << 
 93                         err = hugetlb_add_to_p << 
 94                                                << 
 95                                                << 
 96                         if (err) {             << 
 97                                 folio_put(foli << 
 98                                 return ERR_PTR << 
 99                         }                      << 
100                         folio_unlock(folio);   << 
101                         return folio;          << 
102                 }                              << 
103                 return ERR_PTR(-ENOMEM);       << 
104         }                                      << 
105 #endif                                         << 
106         return shmem_read_folio(memfd->f_mappi << 
107 }                                              << 
108                                                << 
109 /*                                             << 
110  * Setting SEAL_WRITE requires us to verify th     67  * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
111  * via get_user_pages(), drivers might have so     68  * via get_user_pages(), drivers might have some pending I/O without any active
112  * user-space mappings (eg., direct-IO, AIO).  !!  69  * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
113  * and see whether it has an elevated ref-coun     70  * and see whether it has an elevated ref-count. If so, we tag them and wait for
114  * them to be dropped.                             71  * them to be dropped.
115  * The caller must guarantee that no new user      72  * The caller must guarantee that no new user will acquire writable references
116  * to those folios to avoid races.             !!  73  * to those pages to avoid races.
117  */                                                74  */
118 static int memfd_wait_for_pins(struct address_     75 static int memfd_wait_for_pins(struct address_space *mapping)
119 {                                                  76 {
120         XA_STATE(xas, &mapping->i_pages, 0);       77         XA_STATE(xas, &mapping->i_pages, 0);
121         struct folio *folio;                   !!  78         struct page *page;
122         int error, scan;                           79         int error, scan;
123                                                    80 
124         memfd_tag_pins(&xas);                      81         memfd_tag_pins(&xas);
125                                                    82 
126         error = 0;                                 83         error = 0;
127         for (scan = 0; scan <= LAST_SCAN; scan     84         for (scan = 0; scan <= LAST_SCAN; scan++) {
128                 int latency = 0;                   85                 int latency = 0;
                                                   >>  86                 int cache_count;
129                                                    87 
130                 if (!xas_marked(&xas, MEMFD_TA     88                 if (!xas_marked(&xas, MEMFD_TAG_PINNED))
131                         break;                     89                         break;
132                                                    90 
133                 if (!scan)                         91                 if (!scan)
134                         lru_add_drain_all();       92                         lru_add_drain_all();
135                 else if (schedule_timeout_kill     93                 else if (schedule_timeout_killable((HZ << scan) / 200))
136                         scan = LAST_SCAN;          94                         scan = LAST_SCAN;
137                                                    95 
138                 xas_set(&xas, 0);                  96                 xas_set(&xas, 0);
139                 xas_lock_irq(&xas);                97                 xas_lock_irq(&xas);
140                 xas_for_each_marked(&xas, foli !!  98                 xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
141                         bool clear = true;         99                         bool clear = true;
142                                                   100 
143                         if (!xa_is_value(folio !! 101                         cache_count = 1;
144                             memfd_folio_has_ex !! 102                         if (!xa_is_value(page) &&
                                                   >> 103                             PageTransHuge(page) && !PageHuge(page))
                                                   >> 104                                 cache_count = HPAGE_PMD_NR;
                                                   >> 105 
                                                   >> 106                         if (!xa_is_value(page) && cache_count !=
                                                   >> 107                             page_count(page) - total_mapcount(page)) {
145                                 /*                108                                 /*
146                                  * On the last    109                                  * On the last scan, we clean up all those tags
147                                  * we inserted    110                                  * we inserted; but make a note that we still
148                                  * found folio !! 111                                  * found pages pinned.
149                                  */               112                                  */
150                                 if (scan == LA    113                                 if (scan == LAST_SCAN)
151                                         error     114                                         error = -EBUSY;
152                                 else              115                                 else
153                                         clear     116                                         clear = false;
154                         }                         117                         }
155                         if (clear)                118                         if (clear)
156                                 xas_clear_mark    119                                 xas_clear_mark(&xas, MEMFD_TAG_PINNED);
157                                                   120 
158                         if (++latency < XA_CHE !! 121                         latency += cache_count;
                                                   >> 122                         if (latency < XA_CHECK_SCHED)
159                                 continue;         123                                 continue;
160                         latency = 0;              124                         latency = 0;
161                                                   125 
162                         xas_pause(&xas);          126                         xas_pause(&xas);
163                         xas_unlock_irq(&xas);     127                         xas_unlock_irq(&xas);
164                         cond_resched();           128                         cond_resched();
165                         xas_lock_irq(&xas);       129                         xas_lock_irq(&xas);
166                 }                                 130                 }
167                 xas_unlock_irq(&xas);             131                 xas_unlock_irq(&xas);
168         }                                         132         }
169                                                   133 
170         return error;                             134         return error;
171 }                                                 135 }
172                                                   136 
173 static unsigned int *memfd_file_seals_ptr(stru    137 static unsigned int *memfd_file_seals_ptr(struct file *file)
174 {                                                 138 {
175         if (shmem_file(file))                     139         if (shmem_file(file))
176                 return &SHMEM_I(file_inode(fil    140                 return &SHMEM_I(file_inode(file))->seals;
177                                                   141 
178 #ifdef CONFIG_HUGETLBFS                           142 #ifdef CONFIG_HUGETLBFS
179         if (is_file_hugepages(file))              143         if (is_file_hugepages(file))
180                 return &HUGETLBFS_I(file_inode    144                 return &HUGETLBFS_I(file_inode(file))->seals;
181 #endif                                            145 #endif
182                                                   146 
183         return NULL;                              147         return NULL;
184 }                                                 148 }
185                                                   149 
186 #define F_ALL_SEALS (F_SEAL_SEAL | \              150 #define F_ALL_SEALS (F_SEAL_SEAL | \
187                      F_SEAL_EXEC | \              151                      F_SEAL_EXEC | \
188                      F_SEAL_SHRINK | \            152                      F_SEAL_SHRINK | \
189                      F_SEAL_GROW | \              153                      F_SEAL_GROW | \
190                      F_SEAL_WRITE | \             154                      F_SEAL_WRITE | \
191                      F_SEAL_FUTURE_WRITE)         155                      F_SEAL_FUTURE_WRITE)
192                                                   156 
193 static int memfd_add_seals(struct file *file,     157 static int memfd_add_seals(struct file *file, unsigned int seals)
194 {                                                 158 {
195         struct inode *inode = file_inode(file)    159         struct inode *inode = file_inode(file);
196         unsigned int *file_seals;                 160         unsigned int *file_seals;
197         int error;                                161         int error;
198                                                   162 
199         /*                                        163         /*
200          * SEALING                                164          * SEALING
201          * Sealing allows multiple parties to     165          * Sealing allows multiple parties to share a tmpfs or hugetlbfs file
202          * but restrict access to a specific s    166          * but restrict access to a specific subset of file operations. Seals
203          * can only be added, but never remove    167          * can only be added, but never removed. This way, mutually untrusted
204          * parties can share common memory reg    168          * parties can share common memory regions with a well-defined policy.
205          * A malicious peer can thus never per    169          * A malicious peer can thus never perform unwanted operations on a
206          * shared object.                         170          * shared object.
207          *                                        171          *
208          * Seals are only supported on special    172          * Seals are only supported on special tmpfs or hugetlbfs files and
209          * always affect the whole underlying     173          * always affect the whole underlying inode. Once a seal is set, it
210          * may prevent some kinds of access to    174          * may prevent some kinds of access to the file. Currently, the
211          * following seals are defined:           175          * following seals are defined:
212          *   SEAL_SEAL: Prevent further seals     176          *   SEAL_SEAL: Prevent further seals from being set on this file
213          *   SEAL_SHRINK: Prevent the file fro    177          *   SEAL_SHRINK: Prevent the file from shrinking
214          *   SEAL_GROW: Prevent the file from     178          *   SEAL_GROW: Prevent the file from growing
215          *   SEAL_WRITE: Prevent write access     179          *   SEAL_WRITE: Prevent write access to the file
216          *   SEAL_EXEC: Prevent modification o    180          *   SEAL_EXEC: Prevent modification of the exec bits in the file mode
217          *                                        181          *
218          * As we don't require any trust relat    182          * As we don't require any trust relationship between two parties, we
219          * must prevent seals from being remov    183          * must prevent seals from being removed. Therefore, sealing a file
220          * only adds a given set of seals to t    184          * only adds a given set of seals to the file, it never touches
221          * existing seals. Furthermore, the "s    185          * existing seals. Furthermore, the "setting seals"-operation can be
222          * sealed itself, which basically prev    186          * sealed itself, which basically prevents any further seal from being
223          * added.                                 187          * added.
224          *                                        188          *
225          * Semantics of sealing are only defin    189          * Semantics of sealing are only defined on volatile files. Only
226          * anonymous tmpfs and hugetlbfs files    190          * anonymous tmpfs and hugetlbfs files support sealing. More
227          * importantly, seals are never writte    191          * importantly, seals are never written to disk. Therefore, there's
228          * no plan to support it on other file    192          * no plan to support it on other file types.
229          */                                       193          */
230                                                   194 
231         if (!(file->f_mode & FMODE_WRITE))        195         if (!(file->f_mode & FMODE_WRITE))
232                 return -EPERM;                    196                 return -EPERM;
233         if (seals & ~(unsigned int)F_ALL_SEALS    197         if (seals & ~(unsigned int)F_ALL_SEALS)
234                 return -EINVAL;                   198                 return -EINVAL;
235                                                   199 
236         inode_lock(inode);                        200         inode_lock(inode);
237                                                   201 
238         file_seals = memfd_file_seals_ptr(file    202         file_seals = memfd_file_seals_ptr(file);
239         if (!file_seals) {                        203         if (!file_seals) {
240                 error = -EINVAL;                  204                 error = -EINVAL;
241                 goto unlock;                      205                 goto unlock;
242         }                                         206         }
243                                                   207 
244         if (*file_seals & F_SEAL_SEAL) {          208         if (*file_seals & F_SEAL_SEAL) {
245                 error = -EPERM;                   209                 error = -EPERM;
246                 goto unlock;                      210                 goto unlock;
247         }                                         211         }
248                                                   212 
249         if ((seals & F_SEAL_WRITE) && !(*file_    213         if ((seals & F_SEAL_WRITE) && !(*file_seals & F_SEAL_WRITE)) {
250                 error = mapping_deny_writable(    214                 error = mapping_deny_writable(file->f_mapping);
251                 if (error)                        215                 if (error)
252                         goto unlock;              216                         goto unlock;
253                                                   217 
254                 error = memfd_wait_for_pins(fi    218                 error = memfd_wait_for_pins(file->f_mapping);
255                 if (error) {                      219                 if (error) {
256                         mapping_allow_writable    220                         mapping_allow_writable(file->f_mapping);
257                         goto unlock;              221                         goto unlock;
258                 }                                 222                 }
259         }                                         223         }
260                                                   224 
261         /*                                        225         /*
262          * SEAL_EXEC implys SEAL_WRITE, making    226          * SEAL_EXEC implys SEAL_WRITE, making W^X from the start.
263          */                                       227          */
264         if (seals & F_SEAL_EXEC && inode->i_mo    228         if (seals & F_SEAL_EXEC && inode->i_mode & 0111)
265                 seals |= F_SEAL_SHRINK|F_SEAL_    229                 seals |= F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE;
266                                                   230 
267         *file_seals |= seals;                     231         *file_seals |= seals;
268         error = 0;                                232         error = 0;
269                                                   233 
270 unlock:                                           234 unlock:
271         inode_unlock(inode);                      235         inode_unlock(inode);
272         return error;                             236         return error;
273 }                                                 237 }
274                                                   238 
275 static int memfd_get_seals(struct file *file)     239 static int memfd_get_seals(struct file *file)
276 {                                                 240 {
277         unsigned int *seals = memfd_file_seals    241         unsigned int *seals = memfd_file_seals_ptr(file);
278                                                   242 
279         return seals ? *seals : -EINVAL;          243         return seals ? *seals : -EINVAL;
280 }                                                 244 }
281                                                   245 
282 long memfd_fcntl(struct file *file, unsigned i    246 long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
283 {                                                 247 {
284         long error;                               248         long error;
285                                                   249 
286         switch (cmd) {                            250         switch (cmd) {
287         case F_ADD_SEALS:                         251         case F_ADD_SEALS:
288                 error = memfd_add_seals(file,     252                 error = memfd_add_seals(file, arg);
289                 break;                            253                 break;
290         case F_GET_SEALS:                         254         case F_GET_SEALS:
291                 error = memfd_get_seals(file);    255                 error = memfd_get_seals(file);
292                 break;                            256                 break;
293         default:                                  257         default:
294                 error = -EINVAL;                  258                 error = -EINVAL;
295                 break;                            259                 break;
296         }                                         260         }
297                                                   261 
298         return error;                             262         return error;
299 }                                                 263 }
300                                                   264 
301 #define MFD_NAME_PREFIX "memfd:"                  265 #define MFD_NAME_PREFIX "memfd:"
302 #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_P    266 #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
303 #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_    267 #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
304                                                   268 
305 #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW    269 #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_NOEXEC_SEAL | MFD_EXEC)
306                                                   270 
307 static int check_sysctl_memfd_noexec(unsigned     271 static int check_sysctl_memfd_noexec(unsigned int *flags)
308 {                                                 272 {
309 #ifdef CONFIG_SYSCTL                              273 #ifdef CONFIG_SYSCTL
310         struct pid_namespace *ns = task_active    274         struct pid_namespace *ns = task_active_pid_ns(current);
311         int sysctl = pidns_memfd_noexec_scope(    275         int sysctl = pidns_memfd_noexec_scope(ns);
312                                                   276 
313         if (!(*flags & (MFD_EXEC | MFD_NOEXEC_    277         if (!(*flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
314                 if (sysctl >= MEMFD_NOEXEC_SCO    278                 if (sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL)
315                         *flags |= MFD_NOEXEC_S    279                         *flags |= MFD_NOEXEC_SEAL;
316                 else                              280                 else
317                         *flags |= MFD_EXEC;       281                         *flags |= MFD_EXEC;
318         }                                         282         }
319                                                   283 
320         if (!(*flags & MFD_NOEXEC_SEAL) && sys    284         if (!(*flags & MFD_NOEXEC_SEAL) && sysctl >= MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED) {
321                 pr_err_ratelimited(               285                 pr_err_ratelimited(
322                         "%s[%d]: memfd_create(    286                         "%s[%d]: memfd_create() requires MFD_NOEXEC_SEAL with vm.memfd_noexec=%d\n",
323                         current->comm, task_pi    287                         current->comm, task_pid_nr(current), sysctl);
324                 return -EACCES;                   288                 return -EACCES;
325         }                                         289         }
326 #endif                                            290 #endif
327         return 0;                                 291         return 0;
328 }                                                 292 }
329                                                   293 
330 SYSCALL_DEFINE2(memfd_create,                     294 SYSCALL_DEFINE2(memfd_create,
331                 const char __user *, uname,       295                 const char __user *, uname,
332                 unsigned int, flags)              296                 unsigned int, flags)
333 {                                                 297 {
334         unsigned int *file_seals;                 298         unsigned int *file_seals;
335         struct file *file;                        299         struct file *file;
336         int fd, error;                            300         int fd, error;
337         char *name;                               301         char *name;
338         long len;                                 302         long len;
339                                                   303 
340         if (!(flags & MFD_HUGETLB)) {             304         if (!(flags & MFD_HUGETLB)) {
341                 if (flags & ~(unsigned int)MFD    305                 if (flags & ~(unsigned int)MFD_ALL_FLAGS)
342                         return -EINVAL;           306                         return -EINVAL;
343         } else {                                  307         } else {
344                 /* Allow huge page size encodi    308                 /* Allow huge page size encoding in flags. */
345                 if (flags & ~(unsigned int)(MF    309                 if (flags & ~(unsigned int)(MFD_ALL_FLAGS |
346                                 (MFD_HUGE_MASK    310                                 (MFD_HUGE_MASK << MFD_HUGE_SHIFT)))
347                         return -EINVAL;           311                         return -EINVAL;
348         }                                         312         }
349                                                   313 
350         /* Invalid if both EXEC and NOEXEC_SEA    314         /* Invalid if both EXEC and NOEXEC_SEAL are set.*/
351         if ((flags & MFD_EXEC) && (flags & MFD    315         if ((flags & MFD_EXEC) && (flags & MFD_NOEXEC_SEAL))
352                 return -EINVAL;                   316                 return -EINVAL;
                                                   >> 317 
                                                   >> 318         if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
                                                   >> 319                 pr_warn_once(
                                                   >> 320                         "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
                                                   >> 321                         current->comm, task_pid_nr(current));
                                                   >> 322         }
353                                                   323 
354         error = check_sysctl_memfd_noexec(&fla    324         error = check_sysctl_memfd_noexec(&flags);
355         if (error < 0)                            325         if (error < 0)
356                 return error;                     326                 return error;
357                                                   327 
358         /* length includes terminating zero */    328         /* length includes terminating zero */
359         len = strnlen_user(uname, MFD_NAME_MAX    329         len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
360         if (len <= 0)                             330         if (len <= 0)
361                 return -EFAULT;                   331                 return -EFAULT;
362         if (len > MFD_NAME_MAX_LEN + 1)           332         if (len > MFD_NAME_MAX_LEN + 1)
363                 return -EINVAL;                   333                 return -EINVAL;
364                                                   334 
365         name = kmalloc(len + MFD_NAME_PREFIX_L    335         name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_KERNEL);
366         if (!name)                                336         if (!name)
367                 return -ENOMEM;                   337                 return -ENOMEM;
368                                                   338 
369         strcpy(name, MFD_NAME_PREFIX);            339         strcpy(name, MFD_NAME_PREFIX);
370         if (copy_from_user(&name[MFD_NAME_PREF    340         if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
371                 error = -EFAULT;                  341                 error = -EFAULT;
372                 goto err_name;                    342                 goto err_name;
373         }                                         343         }
374                                                   344 
375         /* terminating-zero may have changed a    345         /* terminating-zero may have changed after strnlen_user() returned */
376         if (name[len + MFD_NAME_PREFIX_LEN - 1    346         if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
377                 error = -EFAULT;                  347                 error = -EFAULT;
378                 goto err_name;                    348                 goto err_name;
379         }                                         349         }
380                                                   350 
381         fd = get_unused_fd_flags((flags & MFD_    351         fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
382         if (fd < 0) {                             352         if (fd < 0) {
383                 error = fd;                       353                 error = fd;
384                 goto err_name;                    354                 goto err_name;
385         }                                         355         }
386                                                   356 
387         if (flags & MFD_HUGETLB) {                357         if (flags & MFD_HUGETLB) {
388                 file = hugetlb_file_setup(name    358                 file = hugetlb_file_setup(name, 0, VM_NORESERVE,
389                                         HUGETL    359                                         HUGETLB_ANONHUGE_INODE,
390                                         (flags    360                                         (flags >> MFD_HUGE_SHIFT) &
391                                         MFD_HU    361                                         MFD_HUGE_MASK);
392         } else                                    362         } else
393                 file = shmem_file_setup(name,     363                 file = shmem_file_setup(name, 0, VM_NORESERVE);
394         if (IS_ERR(file)) {                       364         if (IS_ERR(file)) {
395                 error = PTR_ERR(file);            365                 error = PTR_ERR(file);
396                 goto err_fd;                      366                 goto err_fd;
397         }                                         367         }
398         file->f_mode |= FMODE_LSEEK | FMODE_PR    368         file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
399         file->f_flags |= O_LARGEFILE;             369         file->f_flags |= O_LARGEFILE;
400                                                   370 
401         if (flags & MFD_NOEXEC_SEAL) {            371         if (flags & MFD_NOEXEC_SEAL) {
402                 struct inode *inode = file_ino    372                 struct inode *inode = file_inode(file);
403                                                   373 
404                 inode->i_mode &= ~0111;           374                 inode->i_mode &= ~0111;
405                 file_seals = memfd_file_seals_    375                 file_seals = memfd_file_seals_ptr(file);
406                 if (file_seals) {                 376                 if (file_seals) {
407                         *file_seals &= ~F_SEAL    377                         *file_seals &= ~F_SEAL_SEAL;
408                         *file_seals |= F_SEAL_    378                         *file_seals |= F_SEAL_EXEC;
409                 }                                 379                 }
410         } else if (flags & MFD_ALLOW_SEALING)     380         } else if (flags & MFD_ALLOW_SEALING) {
411                 /* MFD_EXEC and MFD_ALLOW_SEAL    381                 /* MFD_EXEC and MFD_ALLOW_SEALING are set */
412                 file_seals = memfd_file_seals_    382                 file_seals = memfd_file_seals_ptr(file);
413                 if (file_seals)                   383                 if (file_seals)
414                         *file_seals &= ~F_SEAL    384                         *file_seals &= ~F_SEAL_SEAL;
415         }                                         385         }
416                                                   386 
417         fd_install(fd, file);                     387         fd_install(fd, file);
418         kfree(name);                              388         kfree(name);
419         return fd;                                389         return fd;
420                                                   390 
421 err_fd:                                           391 err_fd:
422         put_unused_fd(fd);                        392         put_unused_fd(fd);
423 err_name:                                         393 err_name:
424         kfree(name);                              394         kfree(name);
425         return error;                             395         return error;
426 }                                                 396 }
427                                                   397 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php