~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/memfd.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/memfd.c (Version linux-6.12-rc7) and /mm/memfd.c (Version linux-6.6.58)


** Warning: Cannot open xref database.

  1 /*                                                  1 
  2  * memfd_create system call and file sealing s    
  3  *                                                
  4  * Code was originally included in shmem.c, an    
  5  * use by hugetlbfs as well as tmpfs.             
  6  *                                                
  7  * This file is released under the GPL.           
  8  */                                               
  9                                                   
 10 #include <linux/fs.h>                             
 11 #include <linux/vfs.h>                            
 12 #include <linux/pagemap.h>                        
 13 #include <linux/file.h>                           
 14 #include <linux/mm.h>                             
 15 #include <linux/sched/signal.h>                   
 16 #include <linux/khugepaged.h>                     
 17 #include <linux/syscalls.h>                       
 18 #include <linux/hugetlb.h>                        
 19 #include <linux/shmem_fs.h>                       
 20 #include <linux/memfd.h>                          
 21 #include <linux/pid_namespace.h>                  
 22 #include <uapi/linux/memfd.h>                     
 23                                                   
 24 /*                                                
 25  * We need a tag: a new tag would expand every    
 26  * so reuse a tag which we firmly believe is n    
 27  * or hugetlbfs because they are memory only f    
 28  */                                               
 29 #define MEMFD_TAG_PINNED        PAGECACHE_TAG_    
 30 #define LAST_SCAN               4       /* abo    
 31                                                   
 32 static bool memfd_folio_has_extra_refs(struct     
 33 {                                                 
 34         return folio_ref_count(folio) - folio_    
 35                folio_nr_pages(folio);             
 36 }                                                 
 37                                                   
 38 static void memfd_tag_pins(struct xa_state *xa    
 39 {                                                 
 40         struct folio *folio;                      
 41         int latency = 0;                          
 42                                                   
 43         lru_add_drain();                          
 44                                                   
 45         xas_lock_irq(xas);                        
 46         xas_for_each(xas, folio, ULONG_MAX) {     
 47                 if (!xa_is_value(folio) && mem    
 48                         xas_set_mark(xas, MEMF    
 49                                                   
 50                 if (++latency < XA_CHECK_SCHED    
 51                         continue;                 
 52                 latency = 0;                      
 53                                                   
 54                 xas_pause(xas);                   
 55                 xas_unlock_irq(xas);              
 56                 cond_resched();                   
 57                 xas_lock_irq(xas);                
 58         }                                         
 59         xas_unlock_irq(xas);                      
 60 }                                                 
 61                                                   
 62 /*                                                
 63  * This is a helper function used by memfd_pin    
 64  * It is mainly called to allocate a folio in     
 65  * (memfd_pin_folios()) cannot find a folio in    
 66  * index in the mapping.                          
 67  */                                               
 68 struct folio *memfd_alloc_folio(struct file *m    
 69 {                                                 
 70 #ifdef CONFIG_HUGETLB_PAGE                        
 71         struct folio *folio;                      
 72         gfp_t gfp_mask;                           
 73         int err;                                  
 74                                                   
 75         if (is_file_hugepages(memfd)) {           
 76                 /*                                
 77                  * The folio would most likely    
 78                  * therefore, we have zone mem    
 79                  * alloc from. Also, the folio    
 80                  * amount of time, so it is no    
 81                  */                               
 82                 struct hstate *h = hstate_file    
 83                                                   
 84                 gfp_mask = htlb_alloc_mask(h);    
 85                 gfp_mask &= ~(__GFP_HIGHMEM |     
 86                 idx >>= huge_page_order(h);       
 87                                                   
 88                 folio = alloc_hugetlb_folio_re    
 89                                                   
 90                                                   
 91                                                   
 92                 if (folio) {                      
 93                         err = hugetlb_add_to_p    
 94                                                   
 95                                                   
 96                         if (err) {                
 97                                 folio_put(foli    
 98                                 return ERR_PTR    
 99                         }                         
100                         folio_unlock(folio);      
101                         return folio;             
102                 }                                 
103                 return ERR_PTR(-ENOMEM);          
104         }                                         
105 #endif                                            
106         return shmem_read_folio(memfd->f_mappi    
107 }                                                 
108                                                   
109 /*                                                
110  * Setting SEAL_WRITE requires us to verify th    
111  * via get_user_pages(), drivers might have so    
112  * user-space mappings (eg., direct-IO, AIO).     
113  * and see whether it has an elevated ref-coun    
114  * them to be dropped.                            
115  * The caller must guarantee that no new user     
116  * to those folios to avoid races.                
117  */                                               
118 static int memfd_wait_for_pins(struct address_    
119 {                                                 
120         XA_STATE(xas, &mapping->i_pages, 0);      
121         struct folio *folio;                      
122         int error, scan;                          
123                                                   
124         memfd_tag_pins(&xas);                     
125                                                   
126         error = 0;                                
127         for (scan = 0; scan <= LAST_SCAN; scan    
128                 int latency = 0;                  
129                                                   
130                 if (!xas_marked(&xas, MEMFD_TA    
131                         break;                    
132                                                   
133                 if (!scan)                        
134                         lru_add_drain_all();      
135                 else if (schedule_timeout_kill    
136                         scan = LAST_SCAN;         
137                                                   
138                 xas_set(&xas, 0);                 
139                 xas_lock_irq(&xas);               
140                 xas_for_each_marked(&xas, foli    
141                         bool clear = true;        
142                                                   
143                         if (!xa_is_value(folio    
144                             memfd_folio_has_ex    
145                                 /*                
146                                  * On the last    
147                                  * we inserted    
148                                  * found folio    
149                                  */               
150                                 if (scan == LA    
151                                         error     
152                                 else              
153                                         clear     
154                         }                         
155                         if (clear)                
156                                 xas_clear_mark    
157                                                   
158                         if (++latency < XA_CHE    
159                                 continue;         
160                         latency = 0;              
161                                                   
162                         xas_pause(&xas);          
163                         xas_unlock_irq(&xas);     
164                         cond_resched();           
165                         xas_lock_irq(&xas);       
166                 }                                 
167                 xas_unlock_irq(&xas);             
168         }                                         
169                                                   
170         return error;                             
171 }                                                 
172                                                   
173 static unsigned int *memfd_file_seals_ptr(stru    
174 {                                                 
175         if (shmem_file(file))                     
176                 return &SHMEM_I(file_inode(fil    
177                                                   
178 #ifdef CONFIG_HUGETLBFS                           
179         if (is_file_hugepages(file))              
180                 return &HUGETLBFS_I(file_inode    
181 #endif                                            
182                                                   
183         return NULL;                              
184 }                                                 
185                                                   
186 #define F_ALL_SEALS (F_SEAL_SEAL | \              
187                      F_SEAL_EXEC | \              
188                      F_SEAL_SHRINK | \            
189                      F_SEAL_GROW | \              
190                      F_SEAL_WRITE | \             
191                      F_SEAL_FUTURE_WRITE)         
192                                                   
193 static int memfd_add_seals(struct file *file,     
194 {                                                 
195         struct inode *inode = file_inode(file)    
196         unsigned int *file_seals;                 
197         int error;                                
198                                                   
199         /*                                        
200          * SEALING                                
201          * Sealing allows multiple parties to     
202          * but restrict access to a specific s    
203          * can only be added, but never remove    
204          * parties can share common memory reg    
205          * A malicious peer can thus never per    
206          * shared object.                         
207          *                                        
208          * Seals are only supported on special    
209          * always affect the whole underlying     
210          * may prevent some kinds of access to    
211          * following seals are defined:           
212          *   SEAL_SEAL: Prevent further seals     
213          *   SEAL_SHRINK: Prevent the file fro    
214          *   SEAL_GROW: Prevent the file from     
215          *   SEAL_WRITE: Prevent write access     
216          *   SEAL_EXEC: Prevent modification o    
217          *                                        
218          * As we don't require any trust relat    
219          * must prevent seals from being remov    
220          * only adds a given set of seals to t    
221          * existing seals. Furthermore, the "s    
222          * sealed itself, which basically prev    
223          * added.                                 
224          *                                        
225          * Semantics of sealing are only defin    
226          * anonymous tmpfs and hugetlbfs files    
227          * importantly, seals are never writte    
228          * no plan to support it on other file    
229          */                                       
230                                                   
231         if (!(file->f_mode & FMODE_WRITE))        
232                 return -EPERM;                    
233         if (seals & ~(unsigned int)F_ALL_SEALS    
234                 return -EINVAL;                   
235                                                   
236         inode_lock(inode);                        
237                                                   
238         file_seals = memfd_file_seals_ptr(file    
239         if (!file_seals) {                        
240                 error = -EINVAL;                  
241                 goto unlock;                      
242         }                                         
243                                                   
244         if (*file_seals & F_SEAL_SEAL) {          
245                 error = -EPERM;                   
246                 goto unlock;                      
247         }                                         
248                                                   
249         if ((seals & F_SEAL_WRITE) && !(*file_    
250                 error = mapping_deny_writable(    
251                 if (error)                        
252                         goto unlock;              
253                                                   
254                 error = memfd_wait_for_pins(fi    
255                 if (error) {                      
256                         mapping_allow_writable    
257                         goto unlock;              
258                 }                                 
259         }                                         
260                                                   
261         /*                                        
262          * SEAL_EXEC implys SEAL_WRITE, making    
263          */                                       
264         if (seals & F_SEAL_EXEC && inode->i_mo    
265                 seals |= F_SEAL_SHRINK|F_SEAL_    
266                                                   
267         *file_seals |= seals;                     
268         error = 0;                                
269                                                   
270 unlock:                                           
271         inode_unlock(inode);                      
272         return error;                             
273 }                                                 
274                                                   
275 static int memfd_get_seals(struct file *file)     
276 {                                                 
277         unsigned int *seals = memfd_file_seals    
278                                                   
279         return seals ? *seals : -EINVAL;          
280 }                                                 
281                                                   
282 long memfd_fcntl(struct file *file, unsigned i    
283 {                                                 
284         long error;                               
285                                                   
286         switch (cmd) {                            
287         case F_ADD_SEALS:                         
288                 error = memfd_add_seals(file,     
289                 break;                            
290         case F_GET_SEALS:                         
291                 error = memfd_get_seals(file);    
292                 break;                            
293         default:                                  
294                 error = -EINVAL;                  
295                 break;                            
296         }                                         
297                                                   
298         return error;                             
299 }                                                 
300                                                   
301 #define MFD_NAME_PREFIX "memfd:"                  
302 #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_P    
303 #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_    
304                                                   
305 #define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW    
306                                                   
307 static int check_sysctl_memfd_noexec(unsigned     
308 {                                                 
309 #ifdef CONFIG_SYSCTL                              
310         struct pid_namespace *ns = task_active    
311         int sysctl = pidns_memfd_noexec_scope(    
312                                                   
313         if (!(*flags & (MFD_EXEC | MFD_NOEXEC_    
314                 if (sysctl >= MEMFD_NOEXEC_SCO    
315                         *flags |= MFD_NOEXEC_S    
316                 else                              
317                         *flags |= MFD_EXEC;       
318         }                                         
319                                                   
320         if (!(*flags & MFD_NOEXEC_SEAL) && sys    
321                 pr_err_ratelimited(               
322                         "%s[%d]: memfd_create(    
323                         current->comm, task_pi    
324                 return -EACCES;                   
325         }                                         
326 #endif                                            
327         return 0;                                 
328 }                                                 
329                                                   
330 SYSCALL_DEFINE2(memfd_create,                     
331                 const char __user *, uname,       
332                 unsigned int, flags)              
333 {                                                 
334         unsigned int *file_seals;                 
335         struct file *file;                        
336         int fd, error;                            
337         char *name;                               
338         long len;                                 
339                                                   
340         if (!(flags & MFD_HUGETLB)) {             
341                 if (flags & ~(unsigned int)MFD    
342                         return -EINVAL;           
343         } else {                                  
344                 /* Allow huge page size encodi    
345                 if (flags & ~(unsigned int)(MF    
346                                 (MFD_HUGE_MASK    
347                         return -EINVAL;           
348         }                                         
349                                                   
350         /* Invalid if both EXEC and NOEXEC_SEA    
351         if ((flags & MFD_EXEC) && (flags & MFD    
352                 return -EINVAL;                   
353                                                   
354         error = check_sysctl_memfd_noexec(&fla    
355         if (error < 0)                            
356                 return error;                     
357                                                   
358         /* length includes terminating zero */    
359         len = strnlen_user(uname, MFD_NAME_MAX    
360         if (len <= 0)                             
361                 return -EFAULT;                   
362         if (len > MFD_NAME_MAX_LEN + 1)           
363                 return -EINVAL;                   
364                                                   
365         name = kmalloc(len + MFD_NAME_PREFIX_L    
366         if (!name)                                
367                 return -ENOMEM;                   
368                                                   
369         strcpy(name, MFD_NAME_PREFIX);            
370         if (copy_from_user(&name[MFD_NAME_PREF    
371                 error = -EFAULT;                  
372                 goto err_name;                    
373         }                                         
374                                                   
375         /* terminating-zero may have changed a    
376         if (name[len + MFD_NAME_PREFIX_LEN - 1    
377                 error = -EFAULT;                  
378                 goto err_name;                    
379         }                                         
380                                                   
381         fd = get_unused_fd_flags((flags & MFD_    
382         if (fd < 0) {                             
383                 error = fd;                       
384                 goto err_name;                    
385         }                                         
386                                                   
387         if (flags & MFD_HUGETLB) {                
388                 file = hugetlb_file_setup(name    
389                                         HUGETL    
390                                         (flags    
391                                         MFD_HU    
392         } else                                    
393                 file = shmem_file_setup(name,     
394         if (IS_ERR(file)) {                       
395                 error = PTR_ERR(file);            
396                 goto err_fd;                      
397         }                                         
398         file->f_mode |= FMODE_LSEEK | FMODE_PR    
399         file->f_flags |= O_LARGEFILE;             
400                                                   
401         if (flags & MFD_NOEXEC_SEAL) {            
402                 struct inode *inode = file_ino    
403                                                   
404                 inode->i_mode &= ~0111;           
405                 file_seals = memfd_file_seals_    
406                 if (file_seals) {                 
407                         *file_seals &= ~F_SEAL    
408                         *file_seals |= F_SEAL_    
409                 }                                 
410         } else if (flags & MFD_ALLOW_SEALING)     
411                 /* MFD_EXEC and MFD_ALLOW_SEAL    
412                 file_seals = memfd_file_seals_    
413                 if (file_seals)                   
414                         *file_seals &= ~F_SEAL    
415         }                                         
416                                                   
417         fd_install(fd, file);                     
418         kfree(name);                              
419         return fd;                                
420                                                   
421 err_fd:                                           
422         put_unused_fd(fd);                        
423 err_name:                                         
424         kfree(name);                              
425         return error;                             
426 }                                                 
427                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php