~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/bpf/arena.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /kernel/bpf/arena.c (Version linux-6.12-rc7) and /kernel/bpf/arena.c (Version ccs-tools-1.8.12)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 
  2 /* Copyright (c) 2024 Meta Platforms, Inc. and    
  3 #include <linux/bpf.h>                            
  4 #include <linux/btf.h>                            
  5 #include <linux/err.h>                            
  6 #include <linux/btf_ids.h>                        
  7 #include <linux/vmalloc.h>                        
  8 #include <linux/pagemap.h>                        
  9                                                   
 10 /*                                                
 11  * bpf_arena is a sparsely populated shared me    
 12  * user space process.                            
 13  *                                                
 14  * For example on x86-64 the values could be:     
 15  * user_vm_start 7f7d26200000     // picked by    
 16  * kern_vm_start ffffc90001e69000 // picked by    
 17  * For user space all pointers within the aren    
 18  * In this example 7f7d26200000 is the address    
 19  * The bpf program will access it as: kern_vm_    
 20  * (u32)7f7d26200000 -> 26200000                  
 21  * hence                                          
 22  * ffffc90001e69000 + 26200000 == ffffc9002806    
 23  * kernel memory region.                          
 24  *                                                
 25  * BPF JITs generate the following code to acc    
 26  *   mov eax, eax  // eax has lower 32-bit of     
 27  *   mov word ptr [rax + r12 + off], bx           
 28  * where r12 == kern_vm_start and off is s16.     
 29  * Hence allocate 4Gb + GUARD_SZ/2 on each sid    
 30  *                                                
 31  * Initially kernel vm_area and user vma are n    
 32  * User space can fault-in any address which w    
 33  * into kernel and user vma.                      
 34  * bpf program can allocate a page via bpf_are    
 35  * which will insert it into kernel vm_area.      
 36  * The later fault-in from user space will pop    
 37  */                                               
 38                                                   
 39 /* number of bytes addressable by LDX/STX insn    
 40 #define GUARD_SZ (1ull << sizeof_field(struct     
 41 #define KERN_VM_SZ (SZ_4G + GUARD_SZ)             
 42                                                   
 43 struct bpf_arena {                                
 44         struct bpf_map map;                       
 45         u64 user_vm_start;                        
 46         u64 user_vm_end;                          
 47         struct vm_struct *kern_vm;                
 48         struct maple_tree mt;                     
 49         struct list_head vma_list;                
 50         struct mutex lock;                        
 51 };                                                
 52                                                   
 53 u64 bpf_arena_get_kern_vm_start(struct bpf_are    
 54 {                                                 
 55         return arena ? (u64) (long) arena->ker    
 56 }                                                 
 57                                                   
 58 u64 bpf_arena_get_user_vm_start(struct bpf_are    
 59 {                                                 
 60         return arena ? arena->user_vm_start :     
 61 }                                                 
 62                                                   
 63 static long arena_map_peek_elem(struct bpf_map    
 64 {                                                 
 65         return -EOPNOTSUPP;                       
 66 }                                                 
 67                                                   
 68 static long arena_map_push_elem(struct bpf_map    
 69 {                                                 
 70         return -EOPNOTSUPP;                       
 71 }                                                 
 72                                                   
 73 static long arena_map_pop_elem(struct bpf_map     
 74 {                                                 
 75         return -EOPNOTSUPP;                       
 76 }                                                 
 77                                                   
 78 static long arena_map_delete_elem(struct bpf_m    
 79 {                                                 
 80         return -EOPNOTSUPP;                       
 81 }                                                 
 82                                                   
 83 static int arena_map_get_next_key(struct bpf_m    
 84 {                                                 
 85         return -EOPNOTSUPP;                       
 86 }                                                 
 87                                                   
 88 static long compute_pgoff(struct bpf_arena *ar    
 89 {                                                 
 90         return (u32)(uaddr - (u32)arena->user_    
 91 }                                                 
 92                                                   
 93 static struct bpf_map *arena_map_alloc(union b    
 94 {                                                 
 95         struct vm_struct *kern_vm;                
 96         int numa_node = bpf_map_attr_numa_node    
 97         struct bpf_arena *arena;                  
 98         u64 vm_range;                             
 99         int err = -ENOMEM;                        
100                                                   
101         if (attr->key_size || attr->value_size    
102             /* BPF_F_MMAPABLE must be set */      
103             !(attr->map_flags & BPF_F_MMAPABLE    
104             /* No unsupported flags present */    
105             (attr->map_flags & ~(BPF_F_SEGV_ON    
106                 return ERR_PTR(-EINVAL);          
107                                                   
108         if (attr->map_extra & ~PAGE_MASK)         
109                 /* If non-zero the map_extra i    
110                 return ERR_PTR(-EINVAL);          
111                                                   
112         vm_range = (u64)attr->max_entries * PA    
113         if (vm_range > SZ_4G)                     
114                 return ERR_PTR(-E2BIG);           
115                                                   
116         if ((attr->map_extra >> 32) != ((attr-    
117                 /* user vma must not cross 32-    
118                 return ERR_PTR(-ERANGE);          
119                                                   
120         kern_vm = get_vm_area(KERN_VM_SZ, VM_S    
121         if (!kern_vm)                             
122                 return ERR_PTR(-ENOMEM);          
123                                                   
124         arena = bpf_map_area_alloc(sizeof(*are    
125         if (!arena)                               
126                 goto err;                         
127                                                   
128         arena->kern_vm = kern_vm;                 
129         arena->user_vm_start = attr->map_extra    
130         if (arena->user_vm_start)                 
131                 arena->user_vm_end = arena->us    
132                                                   
133         INIT_LIST_HEAD(&arena->vma_list);         
134         bpf_map_init_from_attr(&arena->map, at    
135         mt_init_flags(&arena->mt, MT_FLAGS_ALL    
136         mutex_init(&arena->lock);                 
137                                                   
138         return &arena->map;                       
139 err:                                              
140         free_vm_area(kern_vm);                    
141         return ERR_PTR(err);                      
142 }                                                 
143                                                   
144 static int existing_page_cb(pte_t *ptep, unsig    
145 {                                                 
146         struct page *page;                        
147         pte_t pte;                                
148                                                   
149         pte = ptep_get(ptep);                     
150         if (!pte_present(pte)) /* sanity check    
151                 return 0;                         
152         page = pte_page(pte);                     
153         /*                                        
154          * We do not update pte here:             
155          * 1. Nobody should be accessing bpf_a    
156          * 2. TLB flushing is batched or defer    
157          * the TLB entries can stick around an    
158          * the freed page. So it all relies on    
159          */                                       
160         __free_page(page);                        
161         return 0;                                 
162 }                                                 
163                                                   
164 static void arena_map_free(struct bpf_map *map    
165 {                                                 
166         struct bpf_arena *arena = container_of    
167                                                   
168         /*                                        
169          * Check that user vma-s are not aroun    
170          * mmap() holds vm_file which holds bp    
171          * munmap() must have happened on vma     
172          * which would clear arena->vma_list.     
173          */                                       
174         if (WARN_ON_ONCE(!list_empty(&arena->v    
175                 return;                           
176                                                   
177         /*                                        
178          * free_vm_area() calls remove_vm_area    
179          * It unmaps everything from vmalloc a    
180          * Call apply_to_existing_page_range()    
181          * free those pages.                      
182          */                                       
183         apply_to_existing_page_range(&init_mm,    
184                                      KERN_VM_S    
185         free_vm_area(arena->kern_vm);             
186         mtree_destroy(&arena->mt);                
187         bpf_map_area_free(arena);                 
188 }                                                 
189                                                   
190 static void *arena_map_lookup_elem(struct bpf_    
191 {                                                 
192         return ERR_PTR(-EINVAL);                  
193 }                                                 
194                                                   
195 static long arena_map_update_elem(struct bpf_m    
196                                   void *value,    
197 {                                                 
198         return -EOPNOTSUPP;                       
199 }                                                 
200                                                   
201 static int arena_map_check_btf(const struct bp    
202                                const struct bt    
203 {                                                 
204         return 0;                                 
205 }                                                 
206                                                   
207 static u64 arena_map_mem_usage(const struct bp    
208 {                                                 
209         return 0;                                 
210 }                                                 
211                                                   
212 struct vma_list {                                 
213         struct vm_area_struct *vma;               
214         struct list_head head;                    
215         atomic_t mmap_count;                      
216 };                                                
217                                                   
218 static int remember_vma(struct bpf_arena *aren    
219 {                                                 
220         struct vma_list *vml;                     
221                                                   
222         vml = kmalloc(sizeof(*vml), GFP_KERNEL    
223         if (!vml)                                 
224                 return -ENOMEM;                   
225         atomic_set(&vml->mmap_count, 1);          
226         vma->vm_private_data = vml;               
227         vml->vma = vma;                           
228         list_add(&vml->head, &arena->vma_list)    
229         return 0;                                 
230 }                                                 
231                                                   
232 static void arena_vm_open(struct vm_area_struc    
233 {                                                 
234         struct vma_list *vml = vma->vm_private    
235                                                   
236         atomic_inc(&vml->mmap_count);             
237 }                                                 
238                                                   
239 static void arena_vm_close(struct vm_area_stru    
240 {                                                 
241         struct bpf_map *map = vma->vm_file->pr    
242         struct bpf_arena *arena = container_of    
243         struct vma_list *vml = vma->vm_private    
244                                                   
245         if (!atomic_dec_and_test(&vml->mmap_co    
246                 return;                           
247         guard(mutex)(&arena->lock);               
248         /* update link list under lock */         
249         list_del(&vml->head);                     
250         vma->vm_private_data = NULL;              
251         kfree(vml);                               
252 }                                                 
253                                                   
254 #define MT_ENTRY ((void *)&arena_map_ops) /* u    
255                                                   
256 static vm_fault_t arena_vm_fault(struct vm_fau    
257 {                                                 
258         struct bpf_map *map = vmf->vma->vm_fil    
259         struct bpf_arena *arena = container_of    
260         struct page *page;                        
261         long kbase, kaddr;                        
262         int ret;                                  
263                                                   
264         kbase = bpf_arena_get_kern_vm_start(ar    
265         kaddr = kbase + (u32)(vmf->address);      
266                                                   
267         guard(mutex)(&arena->lock);               
268         page = vmalloc_to_page((void *)kaddr);    
269         if (page)                                 
270                 /* already have a page vmap-ed    
271                 goto out;                         
272                                                   
273         if (arena->map.map_flags & BPF_F_SEGV_    
274                 /* User space requested to seg    
275                 return VM_FAULT_SIGSEGV;          
276                                                   
277         ret = mtree_insert(&arena->mt, vmf->pg    
278         if (ret)                                  
279                 return VM_FAULT_SIGSEGV;          
280                                                   
281         /* Account into memcg of the process t    
282         ret = bpf_map_alloc_pages(map, GFP_KER    
283         if (ret) {                                
284                 mtree_erase(&arena->mt, vmf->p    
285                 return VM_FAULT_SIGSEGV;          
286         }                                         
287                                                   
288         ret = vm_area_map_pages(arena->kern_vm    
289         if (ret) {                                
290                 mtree_erase(&arena->mt, vmf->p    
291                 __free_page(page);                
292                 return VM_FAULT_SIGSEGV;          
293         }                                         
294 out:                                              
295         page_ref_add(page, 1);                    
296         vmf->page = page;                         
297         return 0;                                 
298 }                                                 
299                                                   
300 static const struct vm_operations_struct arena    
301         .open           = arena_vm_open,          
302         .close          = arena_vm_close,         
303         .fault          = arena_vm_fault,         
304 };                                                
305                                                   
306 static unsigned long arena_get_unmapped_area(s    
307                                              u    
308                                              u    
309 {                                                 
310         struct bpf_map *map = filp->private_da    
311         struct bpf_arena *arena = container_of    
312         long ret;                                 
313                                                   
314         if (pgoff)                                
315                 return -EINVAL;                   
316         if (len > SZ_4G)                          
317                 return -E2BIG;                    
318                                                   
319         /* if user_vm_start was specified at a    
320         if (arena->user_vm_start) {               
321                 if (len > arena->user_vm_end -    
322                         return -E2BIG;            
323                 if (len != arena->user_vm_end     
324                         return -EINVAL;           
325                 if (addr != arena->user_vm_sta    
326                         return -EINVAL;           
327         }                                         
328                                                   
329         ret = mm_get_unmapped_area(current->mm    
330         if (IS_ERR_VALUE(ret))                    
331                 return ret;                       
332         if ((ret >> 32) == ((ret + len - 1) >>    
333                 return ret;                       
334         if (WARN_ON_ONCE(arena->user_vm_start)    
335                 /* checks at map creation time    
336                 return -EFAULT;                   
337         return round_up(ret, SZ_4G);              
338 }                                                 
339                                                   
340 static int arena_map_mmap(struct bpf_map *map,    
341 {                                                 
342         struct bpf_arena *arena = container_of    
343                                                   
344         guard(mutex)(&arena->lock);               
345         if (arena->user_vm_start && arena->use    
346                 /*                                
347                  * If map_extra was not specif    
348                  * 1st user process can do mma    
349                  * 2nd user process must pass     
350                  *   or                           
351                  * specify addr in map_extra a    
352                  * use the same addr later wit    
353                  */                               
354                 return -EBUSY;                    
355                                                   
356         if (arena->user_vm_end && arena->user_    
357                 /* all user processes must hav    
358                 return -EBUSY;                    
359                                                   
360         /* Earlier checks should prevent this     
361         if (WARN_ON_ONCE(vma->vm_end - vma->vm    
362                 return -EFAULT;                   
363                                                   
364         if (remember_vma(arena, vma))             
365                 return -ENOMEM;                   
366                                                   
367         arena->user_vm_start = vma->vm_start;     
368         arena->user_vm_end = vma->vm_end;         
369         /*                                        
370          * bpf_map_mmap() checks that it's bei    
371          * clears VM_MAYEXEC. Set VM_DONTEXPAN    
372          * potential change of user_vm_start.     
373          */                                       
374         vm_flags_set(vma, VM_DONTEXPAND);         
375         vma->vm_ops = &arena_vm_ops;              
376         return 0;                                 
377 }                                                 
378                                                   
379 static int arena_map_direct_value_addr(const s    
380 {                                                 
381         struct bpf_arena *arena = container_of    
382                                                   
383         if ((u64)off > arena->user_vm_end - ar    
384                 return -ERANGE;                   
385         *imm = (unsigned long)arena->user_vm_s    
386         return 0;                                 
387 }                                                 
388                                                   
389 BTF_ID_LIST_SINGLE(bpf_arena_map_btf_ids, stru    
390 const struct bpf_map_ops arena_map_ops = {        
391         .map_meta_equal = bpf_map_meta_equal,     
392         .map_alloc = arena_map_alloc,             
393         .map_free = arena_map_free,               
394         .map_direct_value_addr = arena_map_dir    
395         .map_mmap = arena_map_mmap,               
396         .map_get_unmapped_area = arena_get_unm    
397         .map_get_next_key = arena_map_get_next    
398         .map_push_elem = arena_map_push_elem,     
399         .map_peek_elem = arena_map_peek_elem,     
400         .map_pop_elem = arena_map_pop_elem,       
401         .map_lookup_elem = arena_map_lookup_el    
402         .map_update_elem = arena_map_update_el    
403         .map_delete_elem = arena_map_delete_el    
404         .map_check_btf = arena_map_check_btf,     
405         .map_mem_usage = arena_map_mem_usage,     
406         .map_btf_id = &bpf_arena_map_btf_ids[0    
407 };                                                
408                                                   
409 static u64 clear_lo32(u64 val)                    
410 {                                                 
411         return val & ~(u64)~0U;                   
412 }                                                 
413                                                   
414 /*                                                
415  * Allocate pages and vmap them into kernel vm    
416  * Later the pages will be mmaped into user sp    
417  */                                               
418 static long arena_alloc_pages(struct bpf_arena    
419 {                                                 
420         /* user_vm_end/start are fixed before     
421         long page_cnt_max = (arena->user_vm_en    
422         u64 kern_vm_start = bpf_arena_get_kern    
423         struct page **pages;                      
424         long pgoff = 0;                           
425         u32 uaddr32;                              
426         int ret, i;                               
427                                                   
428         if (page_cnt > page_cnt_max)              
429                 return 0;                         
430                                                   
431         if (uaddr) {                              
432                 if (uaddr & ~PAGE_MASK)           
433                         return 0;                 
434                 pgoff = compute_pgoff(arena, u    
435                 if (pgoff > page_cnt_max - pag    
436                         /* requested address w    
437                         return 0;                 
438         }                                         
439                                                   
440         /* zeroing is needed, since alloc_page    
441         pages = kvcalloc(page_cnt, sizeof(stru    
442         if (!pages)                               
443                 return 0;                         
444                                                   
445         guard(mutex)(&arena->lock);               
446                                                   
447         if (uaddr)                                
448                 ret = mtree_insert_range(&aren    
449                                          MT_EN    
450         else                                      
451                 ret = mtree_alloc_range(&arena    
452                                         page_c    
453         if (ret)                                  
454                 goto out_free_pages;              
455                                                   
456         ret = bpf_map_alloc_pages(&arena->map,    
457                                   node_id, pag    
458         if (ret)                                  
459                 goto out;                         
460                                                   
461         uaddr32 = (u32)(arena->user_vm_start +    
462         /* Earlier checks made sure that uaddr    
463          * will not overflow 32-bit. Lower 32-    
464          * contiguous user address range.         
465          * Map these pages at kern_vm_start ba    
466          * kern_vm_start + uaddr32 + page_cnt     
467          * lower 32-bit and it's ok.              
468          */                                       
469         ret = vm_area_map_pages(arena->kern_vm    
470                                 kern_vm_start     
471         if (ret) {                                
472                 for (i = 0; i < page_cnt; i++)    
473                         __free_page(pages[i]);    
474                 goto out;                         
475         }                                         
476         kvfree(pages);                            
477         return clear_lo32(arena->user_vm_start    
478 out:                                              
479         mtree_erase(&arena->mt, pgoff);           
480 out_free_pages:                                   
481         kvfree(pages);                            
482         return 0;                                 
483 }                                                 
484                                                   
485 /*                                                
486  * If page is present in vmalloc area, unmap i    
487  * unmap it from all user space vma-s,            
488  * and free it.                                   
489  */                                               
490 static void zap_pages(struct bpf_arena *arena,    
491 {                                                 
492         struct vma_list *vml;                     
493                                                   
494         list_for_each_entry(vml, &arena->vma_l    
495                 zap_page_range_single(vml->vma    
496                                       PAGE_SIZ    
497 }                                                 
498                                                   
499 static void arena_free_pages(struct bpf_arena     
500 {                                                 
501         u64 full_uaddr, uaddr_end;                
502         long kaddr, pgoff, i;                     
503         struct page *page;                        
504                                                   
505         /* only aligned lower 32-bit are relev    
506         uaddr = (u32)uaddr;                       
507         uaddr &= PAGE_MASK;                       
508         full_uaddr = clear_lo32(arena->user_vm    
509         uaddr_end = min(arena->user_vm_end, fu    
510         if (full_uaddr >= uaddr_end)              
511                 return;                           
512                                                   
513         page_cnt = (uaddr_end - full_uaddr) >>    
514                                                   
515         guard(mutex)(&arena->lock);               
516                                                   
517         pgoff = compute_pgoff(arena, uaddr);      
518         /* clear range */                         
519         mtree_store_range(&arena->mt, pgoff, p    
520                                                   
521         if (page_cnt > 1)                         
522                 /* bulk zap if multiple pages     
523                 zap_pages(arena, full_uaddr, p    
524                                                   
525         kaddr = bpf_arena_get_kern_vm_start(ar    
526         for (i = 0; i < page_cnt; i++, kaddr +    
527                 page = vmalloc_to_page((void *    
528                 if (!page)                        
529                         continue;                 
530                 if (page_cnt == 1 && page_mapp    
531                         /* Optimization for th    
532                          * If page wasn't mapp    
533                          * is no need to call     
534                          * page_cnt is big it'    
535                          */                       
536                         zap_pages(arena, full_    
537                 vm_area_unmap_pages(arena->ker    
538                 __free_page(page);                
539         }                                         
540 }                                                 
541                                                   
542 __bpf_kfunc_start_defs();                         
543                                                   
544 __bpf_kfunc void *bpf_arena_alloc_pages(void *    
545                                         int no    
546 {                                                 
547         struct bpf_map *map = p__map;             
548         struct bpf_arena *arena = container_of    
549                                                   
550         if (map->map_type != BPF_MAP_TYPE_AREN    
551                 return NULL;                      
552                                                   
553         return (void *)arena_alloc_pages(arena    
554 }                                                 
555                                                   
556 __bpf_kfunc void bpf_arena_free_pages(void *p_    
557 {                                                 
558         struct bpf_map *map = p__map;             
559         struct bpf_arena *arena = container_of    
560                                                   
561         if (map->map_type != BPF_MAP_TYPE_AREN    
562                 return;                           
563         arena_free_pages(arena, (long)ptr__ign    
564 }                                                 
565 __bpf_kfunc_end_defs();                           
566                                                   
567 BTF_KFUNCS_START(arena_kfuncs)                    
568 BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_T    
569 BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TR    
570 BTF_KFUNCS_END(arena_kfuncs)                      
571                                                   
572 static const struct btf_kfunc_id_set common_kf    
573         .owner = THIS_MODULE,                     
574         .set   = &arena_kfuncs,                   
575 };                                                
576                                                   
577 static int __init kfunc_init(void)                
578 {                                                 
579         return register_btf_kfunc_id_set(BPF_P    
580 }                                                 
581 late_initcall(kfunc_init);                        
582                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php