~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/bpf/arraymap.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  3  * Copyright (c) 2016,2017 Facebook
  4  */
  5 #include <linux/bpf.h>
  6 #include <linux/btf.h>
  7 #include <linux/err.h>
  8 #include <linux/slab.h>
  9 #include <linux/mm.h>
 10 #include <linux/filter.h>
 11 #include <linux/perf_event.h>
 12 #include <uapi/linux/btf.h>
 13 #include <linux/rcupdate_trace.h>
 14 #include <linux/btf_ids.h>
 15 
 16 #include "map_in_map.h"
 17 
 18 #define ARRAY_CREATE_FLAG_MASK \
 19         (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
 20          BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
 21 
 22 static void bpf_array_free_percpu(struct bpf_array *array)
 23 {
 24         int i;
 25 
 26         for (i = 0; i < array->map.max_entries; i++) {
 27                 free_percpu(array->pptrs[i]);
 28                 cond_resched();
 29         }
 30 }
 31 
 32 static int bpf_array_alloc_percpu(struct bpf_array *array)
 33 {
 34         void __percpu *ptr;
 35         int i;
 36 
 37         for (i = 0; i < array->map.max_entries; i++) {
 38                 ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8,
 39                                            GFP_USER | __GFP_NOWARN);
 40                 if (!ptr) {
 41                         bpf_array_free_percpu(array);
 42                         return -ENOMEM;
 43                 }
 44                 array->pptrs[i] = ptr;
 45                 cond_resched();
 46         }
 47 
 48         return 0;
 49 }
 50 
 51 /* Called from syscall */
 52 int array_map_alloc_check(union bpf_attr *attr)
 53 {
 54         bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 55         int numa_node = bpf_map_attr_numa_node(attr);
 56 
 57         /* check sanity of attributes */
 58         if (attr->max_entries == 0 || attr->key_size != 4 ||
 59             attr->value_size == 0 ||
 60             attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
 61             !bpf_map_flags_access_ok(attr->map_flags) ||
 62             (percpu && numa_node != NUMA_NO_NODE))
 63                 return -EINVAL;
 64 
 65         if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
 66             attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
 67                 return -EINVAL;
 68 
 69         if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
 70             attr->map_flags & BPF_F_PRESERVE_ELEMS)
 71                 return -EINVAL;
 72 
 73         /* avoid overflow on round_up(map->value_size) */
 74         if (attr->value_size > INT_MAX)
 75                 return -E2BIG;
 76 
 77         return 0;
 78 }
 79 
 80 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 81 {
 82         bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 83         int numa_node = bpf_map_attr_numa_node(attr);
 84         u32 elem_size, index_mask, max_entries;
 85         bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
 86         u64 array_size, mask64;
 87         struct bpf_array *array;
 88 
 89         elem_size = round_up(attr->value_size, 8);
 90 
 91         max_entries = attr->max_entries;
 92 
 93         /* On 32 bit archs roundup_pow_of_two() with max_entries that has
 94          * upper most bit set in u32 space is undefined behavior due to
 95          * resulting 1U << 32, so do it manually here in u64 space.
 96          */
 97         mask64 = fls_long(max_entries - 1);
 98         mask64 = 1ULL << mask64;
 99         mask64 -= 1;
100 
101         index_mask = mask64;
102         if (!bypass_spec_v1) {
103                 /* round up array size to nearest power of 2,
104                  * since cpu will speculate within index_mask limits
105                  */
106                 max_entries = index_mask + 1;
107                 /* Check for overflows. */
108                 if (max_entries < attr->max_entries)
109                         return ERR_PTR(-E2BIG);
110         }
111 
112         array_size = sizeof(*array);
113         if (percpu) {
114                 array_size += (u64) max_entries * sizeof(void *);
115         } else {
116                 /* rely on vmalloc() to return page-aligned memory and
117                  * ensure array->value is exactly page-aligned
118                  */
119                 if (attr->map_flags & BPF_F_MMAPABLE) {
120                         array_size = PAGE_ALIGN(array_size);
121                         array_size += PAGE_ALIGN((u64) max_entries * elem_size);
122                 } else {
123                         array_size += (u64) max_entries * elem_size;
124                 }
125         }
126 
127         /* allocate all map elements and zero-initialize them */
128         if (attr->map_flags & BPF_F_MMAPABLE) {
129                 void *data;
130 
131                 /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
132                 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
133                 if (!data)
134                         return ERR_PTR(-ENOMEM);
135                 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
136                         - offsetof(struct bpf_array, value);
137         } else {
138                 array = bpf_map_area_alloc(array_size, numa_node);
139         }
140         if (!array)
141                 return ERR_PTR(-ENOMEM);
142         array->index_mask = index_mask;
143         array->map.bypass_spec_v1 = bypass_spec_v1;
144 
145         /* copy mandatory map attributes */
146         bpf_map_init_from_attr(&array->map, attr);
147         array->elem_size = elem_size;
148 
149         if (percpu && bpf_array_alloc_percpu(array)) {
150                 bpf_map_area_free(array);
151                 return ERR_PTR(-ENOMEM);
152         }
153 
154         return &array->map;
155 }
156 
157 static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
158 {
159         return array->value + (u64)array->elem_size * index;
160 }
161 
162 /* Called from syscall or from eBPF program */
163 static void *array_map_lookup_elem(struct bpf_map *map, void *key)
164 {
165         struct bpf_array *array = container_of(map, struct bpf_array, map);
166         u32 index = *(u32 *)key;
167 
168         if (unlikely(index >= array->map.max_entries))
169                 return NULL;
170 
171         return array->value + (u64)array->elem_size * (index & array->index_mask);
172 }
173 
174 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
175                                        u32 off)
176 {
177         struct bpf_array *array = container_of(map, struct bpf_array, map);
178 
179         if (map->max_entries != 1)
180                 return -ENOTSUPP;
181         if (off >= map->value_size)
182                 return -EINVAL;
183 
184         *imm = (unsigned long)array->value;
185         return 0;
186 }
187 
188 static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
189                                        u32 *off)
190 {
191         struct bpf_array *array = container_of(map, struct bpf_array, map);
192         u64 base = (unsigned long)array->value;
193         u64 range = array->elem_size;
194 
195         if (map->max_entries != 1)
196                 return -ENOTSUPP;
197         if (imm < base || imm >= base + range)
198                 return -ENOENT;
199 
200         *off = imm - base;
201         return 0;
202 }
203 
204 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
205 static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
206 {
207         struct bpf_array *array = container_of(map, struct bpf_array, map);
208         struct bpf_insn *insn = insn_buf;
209         u32 elem_size = array->elem_size;
210         const int ret = BPF_REG_0;
211         const int map_ptr = BPF_REG_1;
212         const int index = BPF_REG_2;
213 
214         if (map->map_flags & BPF_F_INNER_MAP)
215                 return -EOPNOTSUPP;
216 
217         *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
218         *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
219         if (!map->bypass_spec_v1) {
220                 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
221                 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
222         } else {
223                 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
224         }
225 
226         if (is_power_of_2(elem_size)) {
227                 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
228         } else {
229                 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
230         }
231         *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
232         *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
233         *insn++ = BPF_MOV64_IMM(ret, 0);
234         return insn - insn_buf;
235 }
236 
237 /* Called from eBPF program */
238 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
239 {
240         struct bpf_array *array = container_of(map, struct bpf_array, map);
241         u32 index = *(u32 *)key;
242 
243         if (unlikely(index >= array->map.max_entries))
244                 return NULL;
245 
246         return this_cpu_ptr(array->pptrs[index & array->index_mask]);
247 }
248 
249 /* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
250 static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
251 {
252         struct bpf_array *array = container_of(map, struct bpf_array, map);
253         struct bpf_insn *insn = insn_buf;
254 
255         if (!bpf_jit_supports_percpu_insn())
256                 return -EOPNOTSUPP;
257 
258         if (map->map_flags & BPF_F_INNER_MAP)
259                 return -EOPNOTSUPP;
260 
261         BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0);
262         *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
263 
264         *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
265         if (!map->bypass_spec_v1) {
266                 *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
267                 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
268         } else {
269                 *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
270         }
271 
272         *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
273         *insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
274         *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
275         *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
276         *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
277         *insn++ = BPF_MOV64_IMM(BPF_REG_0, 0);
278         return insn - insn_buf;
279 }
280 
281 static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
282 {
283         struct bpf_array *array = container_of(map, struct bpf_array, map);
284         u32 index = *(u32 *)key;
285 
286         if (cpu >= nr_cpu_ids)
287                 return NULL;
288 
289         if (unlikely(index >= array->map.max_entries))
290                 return NULL;
291 
292         return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu);
293 }
294 
295 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
296 {
297         struct bpf_array *array = container_of(map, struct bpf_array, map);
298         u32 index = *(u32 *)key;
299         void __percpu *pptr;
300         int cpu, off = 0;
301         u32 size;
302 
303         if (unlikely(index >= array->map.max_entries))
304                 return -ENOENT;
305 
306         /* per_cpu areas are zero-filled and bpf programs can only
307          * access 'value_size' of them, so copying rounded areas
308          * will not leak any kernel data
309          */
310         size = array->elem_size;
311         rcu_read_lock();
312         pptr = array->pptrs[index & array->index_mask];
313         for_each_possible_cpu(cpu) {
314                 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
315                 check_and_init_map_value(map, value + off);
316                 off += size;
317         }
318         rcu_read_unlock();
319         return 0;
320 }
321 
322 /* Called from syscall */
323 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
324 {
325         struct bpf_array *array = container_of(map, struct bpf_array, map);
326         u32 index = key ? *(u32 *)key : U32_MAX;
327         u32 *next = (u32 *)next_key;
328 
329         if (index >= array->map.max_entries) {
330                 *next = 0;
331                 return 0;
332         }
333 
334         if (index == array->map.max_entries - 1)
335                 return -ENOENT;
336 
337         *next = index + 1;
338         return 0;
339 }
340 
341 /* Called from syscall or from eBPF program */
342 static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
343                                   u64 map_flags)
344 {
345         struct bpf_array *array = container_of(map, struct bpf_array, map);
346         u32 index = *(u32 *)key;
347         char *val;
348 
349         if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
350                 /* unknown flags */
351                 return -EINVAL;
352 
353         if (unlikely(index >= array->map.max_entries))
354                 /* all elements were pre-allocated, cannot insert a new one */
355                 return -E2BIG;
356 
357         if (unlikely(map_flags & BPF_NOEXIST))
358                 /* all elements already exist */
359                 return -EEXIST;
360 
361         if (unlikely((map_flags & BPF_F_LOCK) &&
362                      !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
363                 return -EINVAL;
364 
365         if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
366                 val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
367                 copy_map_value(map, val, value);
368                 bpf_obj_free_fields(array->map.record, val);
369         } else {
370                 val = array->value +
371                         (u64)array->elem_size * (index & array->index_mask);
372                 if (map_flags & BPF_F_LOCK)
373                         copy_map_value_locked(map, val, value, false);
374                 else
375                         copy_map_value(map, val, value);
376                 bpf_obj_free_fields(array->map.record, val);
377         }
378         return 0;
379 }
380 
381 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
382                             u64 map_flags)
383 {
384         struct bpf_array *array = container_of(map, struct bpf_array, map);
385         u32 index = *(u32 *)key;
386         void __percpu *pptr;
387         int cpu, off = 0;
388         u32 size;
389 
390         if (unlikely(map_flags > BPF_EXIST))
391                 /* unknown flags */
392                 return -EINVAL;
393 
394         if (unlikely(index >= array->map.max_entries))
395                 /* all elements were pre-allocated, cannot insert a new one */
396                 return -E2BIG;
397 
398         if (unlikely(map_flags == BPF_NOEXIST))
399                 /* all elements already exist */
400                 return -EEXIST;
401 
402         /* the user space will provide round_up(value_size, 8) bytes that
403          * will be copied into per-cpu area. bpf programs can only access
404          * value_size of it. During lookup the same extra bytes will be
405          * returned or zeros which were zero-filled by percpu_alloc,
406          * so no kernel data leaks possible
407          */
408         size = array->elem_size;
409         rcu_read_lock();
410         pptr = array->pptrs[index & array->index_mask];
411         for_each_possible_cpu(cpu) {
412                 copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off);
413                 bpf_obj_free_fields(array->map.record, per_cpu_ptr(pptr, cpu));
414                 off += size;
415         }
416         rcu_read_unlock();
417         return 0;
418 }
419 
420 /* Called from syscall or from eBPF program */
421 static long array_map_delete_elem(struct bpf_map *map, void *key)
422 {
423         return -EINVAL;
424 }
425 
426 static void *array_map_vmalloc_addr(struct bpf_array *array)
427 {
428         return (void *)round_down((unsigned long)array, PAGE_SIZE);
429 }
430 
431 static void array_map_free_timers_wq(struct bpf_map *map)
432 {
433         struct bpf_array *array = container_of(map, struct bpf_array, map);
434         int i;
435 
436         /* We don't reset or free fields other than timer and workqueue
437          * on uref dropping to zero.
438          */
439         if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
440                 for (i = 0; i < array->map.max_entries; i++) {
441                         if (btf_record_has_field(map->record, BPF_TIMER))
442                                 bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
443                         if (btf_record_has_field(map->record, BPF_WORKQUEUE))
444                                 bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
445                 }
446         }
447 }
448 
449 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
450 static void array_map_free(struct bpf_map *map)
451 {
452         struct bpf_array *array = container_of(map, struct bpf_array, map);
453         int i;
454 
455         if (!IS_ERR_OR_NULL(map->record)) {
456                 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
457                         for (i = 0; i < array->map.max_entries; i++) {
458                                 void __percpu *pptr = array->pptrs[i & array->index_mask];
459                                 int cpu;
460 
461                                 for_each_possible_cpu(cpu) {
462                                         bpf_obj_free_fields(map->record, per_cpu_ptr(pptr, cpu));
463                                         cond_resched();
464                                 }
465                         }
466                 } else {
467                         for (i = 0; i < array->map.max_entries; i++)
468                                 bpf_obj_free_fields(map->record, array_map_elem_ptr(array, i));
469                 }
470         }
471 
472         if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
473                 bpf_array_free_percpu(array);
474 
475         if (array->map.map_flags & BPF_F_MMAPABLE)
476                 bpf_map_area_free(array_map_vmalloc_addr(array));
477         else
478                 bpf_map_area_free(array);
479 }
480 
481 static void array_map_seq_show_elem(struct bpf_map *map, void *key,
482                                     struct seq_file *m)
483 {
484         void *value;
485 
486         rcu_read_lock();
487 
488         value = array_map_lookup_elem(map, key);
489         if (!value) {
490                 rcu_read_unlock();
491                 return;
492         }
493 
494         if (map->btf_key_type_id)
495                 seq_printf(m, "%u: ", *(u32 *)key);
496         btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
497         seq_puts(m, "\n");
498 
499         rcu_read_unlock();
500 }
501 
502 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
503                                            struct seq_file *m)
504 {
505         struct bpf_array *array = container_of(map, struct bpf_array, map);
506         u32 index = *(u32 *)key;
507         void __percpu *pptr;
508         int cpu;
509 
510         rcu_read_lock();
511 
512         seq_printf(m, "%u: {\n", *(u32 *)key);
513         pptr = array->pptrs[index & array->index_mask];
514         for_each_possible_cpu(cpu) {
515                 seq_printf(m, "\tcpu%d: ", cpu);
516                 btf_type_seq_show(map->btf, map->btf_value_type_id,
517                                   per_cpu_ptr(pptr, cpu), m);
518                 seq_puts(m, "\n");
519         }
520         seq_puts(m, "}\n");
521 
522         rcu_read_unlock();
523 }
524 
525 static int array_map_check_btf(const struct bpf_map *map,
526                                const struct btf *btf,
527                                const struct btf_type *key_type,
528                                const struct btf_type *value_type)
529 {
530         u32 int_data;
531 
532         /* One exception for keyless BTF: .bss/.data/.rodata map */
533         if (btf_type_is_void(key_type)) {
534                 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
535                     map->max_entries != 1)
536                         return -EINVAL;
537 
538                 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
539                         return -EINVAL;
540 
541                 return 0;
542         }
543 
544         if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
545                 return -EINVAL;
546 
547         int_data = *(u32 *)(key_type + 1);
548         /* bpf array can only take a u32 key. This check makes sure
549          * that the btf matches the attr used during map_create.
550          */
551         if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
552                 return -EINVAL;
553 
554         return 0;
555 }
556 
557 static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
558 {
559         struct bpf_array *array = container_of(map, struct bpf_array, map);
560         pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
561 
562         if (!(map->map_flags & BPF_F_MMAPABLE))
563                 return -EINVAL;
564 
565         if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
566             PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
567                 return -EINVAL;
568 
569         return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
570                                    vma->vm_pgoff + pgoff);
571 }
572 
573 static bool array_map_meta_equal(const struct bpf_map *meta0,
574                                  const struct bpf_map *meta1)
575 {
576         if (!bpf_map_meta_equal(meta0, meta1))
577                 return false;
578         return meta0->map_flags & BPF_F_INNER_MAP ? true :
579                meta0->max_entries == meta1->max_entries;
580 }
581 
582 struct bpf_iter_seq_array_map_info {
583         struct bpf_map *map;
584         void *percpu_value_buf;
585         u32 index;
586 };
587 
588 static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
589 {
590         struct bpf_iter_seq_array_map_info *info = seq->private;
591         struct bpf_map *map = info->map;
592         struct bpf_array *array;
593         u32 index;
594 
595         if (info->index >= map->max_entries)
596                 return NULL;
597 
598         if (*pos == 0)
599                 ++*pos;
600         array = container_of(map, struct bpf_array, map);
601         index = info->index & array->index_mask;
602         if (info->percpu_value_buf)
603                return array->pptrs[index];
604         return array_map_elem_ptr(array, index);
605 }
606 
607 static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
608 {
609         struct bpf_iter_seq_array_map_info *info = seq->private;
610         struct bpf_map *map = info->map;
611         struct bpf_array *array;
612         u32 index;
613 
614         ++*pos;
615         ++info->index;
616         if (info->index >= map->max_entries)
617                 return NULL;
618 
619         array = container_of(map, struct bpf_array, map);
620         index = info->index & array->index_mask;
621         if (info->percpu_value_buf)
622                return array->pptrs[index];
623         return array_map_elem_ptr(array, index);
624 }
625 
626 static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
627 {
628         struct bpf_iter_seq_array_map_info *info = seq->private;
629         struct bpf_iter__bpf_map_elem ctx = {};
630         struct bpf_map *map = info->map;
631         struct bpf_array *array = container_of(map, struct bpf_array, map);
632         struct bpf_iter_meta meta;
633         struct bpf_prog *prog;
634         int off = 0, cpu = 0;
635         void __percpu **pptr;
636         u32 size;
637 
638         meta.seq = seq;
639         prog = bpf_iter_get_info(&meta, v == NULL);
640         if (!prog)
641                 return 0;
642 
643         ctx.meta = &meta;
644         ctx.map = info->map;
645         if (v) {
646                 ctx.key = &info->index;
647 
648                 if (!info->percpu_value_buf) {
649                         ctx.value = v;
650                 } else {
651                         pptr = v;
652                         size = array->elem_size;
653                         for_each_possible_cpu(cpu) {
654                                 copy_map_value_long(map, info->percpu_value_buf + off,
655                                                     per_cpu_ptr(pptr, cpu));
656                                 check_and_init_map_value(map, info->percpu_value_buf + off);
657                                 off += size;
658                         }
659                         ctx.value = info->percpu_value_buf;
660                 }
661         }
662 
663         return bpf_iter_run_prog(prog, &ctx);
664 }
665 
666 static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
667 {
668         return __bpf_array_map_seq_show(seq, v);
669 }
670 
671 static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
672 {
673         if (!v)
674                 (void)__bpf_array_map_seq_show(seq, NULL);
675 }
676 
677 static int bpf_iter_init_array_map(void *priv_data,
678                                    struct bpf_iter_aux_info *aux)
679 {
680         struct bpf_iter_seq_array_map_info *seq_info = priv_data;
681         struct bpf_map *map = aux->map;
682         struct bpf_array *array = container_of(map, struct bpf_array, map);
683         void *value_buf;
684         u32 buf_size;
685 
686         if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
687                 buf_size = array->elem_size * num_possible_cpus();
688                 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
689                 if (!value_buf)
690                         return -ENOMEM;
691 
692                 seq_info->percpu_value_buf = value_buf;
693         }
694 
695         /* bpf_iter_attach_map() acquires a map uref, and the uref may be
696          * released before or in the middle of iterating map elements, so
697          * acquire an extra map uref for iterator.
698          */
699         bpf_map_inc_with_uref(map);
700         seq_info->map = map;
701         return 0;
702 }
703 
704 static void bpf_iter_fini_array_map(void *priv_data)
705 {
706         struct bpf_iter_seq_array_map_info *seq_info = priv_data;
707 
708         bpf_map_put_with_uref(seq_info->map);
709         kfree(seq_info->percpu_value_buf);
710 }
711 
712 static const struct seq_operations bpf_array_map_seq_ops = {
713         .start  = bpf_array_map_seq_start,
714         .next   = bpf_array_map_seq_next,
715         .stop   = bpf_array_map_seq_stop,
716         .show   = bpf_array_map_seq_show,
717 };
718 
719 static const struct bpf_iter_seq_info iter_seq_info = {
720         .seq_ops                = &bpf_array_map_seq_ops,
721         .init_seq_private       = bpf_iter_init_array_map,
722         .fini_seq_private       = bpf_iter_fini_array_map,
723         .seq_priv_size          = sizeof(struct bpf_iter_seq_array_map_info),
724 };
725 
726 static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
727                                     void *callback_ctx, u64 flags)
728 {
729         u32 i, key, num_elems = 0;
730         struct bpf_array *array;
731         bool is_percpu;
732         u64 ret = 0;
733         void *val;
734 
735         if (flags != 0)
736                 return -EINVAL;
737 
738         is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
739         array = container_of(map, struct bpf_array, map);
740         if (is_percpu)
741                 migrate_disable();
742         for (i = 0; i < map->max_entries; i++) {
743                 if (is_percpu)
744                         val = this_cpu_ptr(array->pptrs[i]);
745                 else
746                         val = array_map_elem_ptr(array, i);
747                 num_elems++;
748                 key = i;
749                 ret = callback_fn((u64)(long)map, (u64)(long)&key,
750                                   (u64)(long)val, (u64)(long)callback_ctx, 0);
751                 /* return value: 0 - continue, 1 - stop and return */
752                 if (ret)
753                         break;
754         }
755 
756         if (is_percpu)
757                 migrate_enable();
758         return num_elems;
759 }
760 
761 static u64 array_map_mem_usage(const struct bpf_map *map)
762 {
763         struct bpf_array *array = container_of(map, struct bpf_array, map);
764         bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
765         u32 elem_size = array->elem_size;
766         u64 entries = map->max_entries;
767         u64 usage = sizeof(*array);
768 
769         if (percpu) {
770                 usage += entries * sizeof(void *);
771                 usage += entries * elem_size * num_possible_cpus();
772         } else {
773                 if (map->map_flags & BPF_F_MMAPABLE) {
774                         usage = PAGE_ALIGN(usage);
775                         usage += PAGE_ALIGN(entries * elem_size);
776                 } else {
777                         usage += entries * elem_size;
778                 }
779         }
780         return usage;
781 }
782 
783 BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
784 const struct bpf_map_ops array_map_ops = {
785         .map_meta_equal = array_map_meta_equal,
786         .map_alloc_check = array_map_alloc_check,
787         .map_alloc = array_map_alloc,
788         .map_free = array_map_free,
789         .map_get_next_key = array_map_get_next_key,
790         .map_release_uref = array_map_free_timers_wq,
791         .map_lookup_elem = array_map_lookup_elem,
792         .map_update_elem = array_map_update_elem,
793         .map_delete_elem = array_map_delete_elem,
794         .map_gen_lookup = array_map_gen_lookup,
795         .map_direct_value_addr = array_map_direct_value_addr,
796         .map_direct_value_meta = array_map_direct_value_meta,
797         .map_mmap = array_map_mmap,
798         .map_seq_show_elem = array_map_seq_show_elem,
799         .map_check_btf = array_map_check_btf,
800         .map_lookup_batch = generic_map_lookup_batch,
801         .map_update_batch = generic_map_update_batch,
802         .map_set_for_each_callback_args = map_set_for_each_callback_args,
803         .map_for_each_callback = bpf_for_each_array_elem,
804         .map_mem_usage = array_map_mem_usage,
805         .map_btf_id = &array_map_btf_ids[0],
806         .iter_seq_info = &iter_seq_info,
807 };
808 
809 const struct bpf_map_ops percpu_array_map_ops = {
810         .map_meta_equal = bpf_map_meta_equal,
811         .map_alloc_check = array_map_alloc_check,
812         .map_alloc = array_map_alloc,
813         .map_free = array_map_free,
814         .map_get_next_key = array_map_get_next_key,
815         .map_lookup_elem = percpu_array_map_lookup_elem,
816         .map_gen_lookup = percpu_array_map_gen_lookup,
817         .map_update_elem = array_map_update_elem,
818         .map_delete_elem = array_map_delete_elem,
819         .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
820         .map_seq_show_elem = percpu_array_map_seq_show_elem,
821         .map_check_btf = array_map_check_btf,
822         .map_lookup_batch = generic_map_lookup_batch,
823         .map_update_batch = generic_map_update_batch,
824         .map_set_for_each_callback_args = map_set_for_each_callback_args,
825         .map_for_each_callback = bpf_for_each_array_elem,
826         .map_mem_usage = array_map_mem_usage,
827         .map_btf_id = &array_map_btf_ids[0],
828         .iter_seq_info = &iter_seq_info,
829 };
830 
831 static int fd_array_map_alloc_check(union bpf_attr *attr)
832 {
833         /* only file descriptors can be stored in this type of map */
834         if (attr->value_size != sizeof(u32))
835                 return -EINVAL;
836         /* Program read-only/write-only not supported for special maps yet. */
837         if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
838                 return -EINVAL;
839         return array_map_alloc_check(attr);
840 }
841 
842 static void fd_array_map_free(struct bpf_map *map)
843 {
844         struct bpf_array *array = container_of(map, struct bpf_array, map);
845         int i;
846 
847         /* make sure it's empty */
848         for (i = 0; i < array->map.max_entries; i++)
849                 BUG_ON(array->ptrs[i] != NULL);
850 
851         bpf_map_area_free(array);
852 }
853 
854 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
855 {
856         return ERR_PTR(-EOPNOTSUPP);
857 }
858 
859 /* only called from syscall */
860 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
861 {
862         void **elem, *ptr;
863         int ret =  0;
864 
865         if (!map->ops->map_fd_sys_lookup_elem)
866                 return -ENOTSUPP;
867 
868         rcu_read_lock();
869         elem = array_map_lookup_elem(map, key);
870         if (elem && (ptr = READ_ONCE(*elem)))
871                 *value = map->ops->map_fd_sys_lookup_elem(ptr);
872         else
873                 ret = -ENOENT;
874         rcu_read_unlock();
875 
876         return ret;
877 }
878 
879 /* only called from syscall */
880 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
881                                  void *key, void *value, u64 map_flags)
882 {
883         struct bpf_array *array = container_of(map, struct bpf_array, map);
884         void *new_ptr, *old_ptr;
885         u32 index = *(u32 *)key, ufd;
886 
887         if (map_flags != BPF_ANY)
888                 return -EINVAL;
889 
890         if (index >= array->map.max_entries)
891                 return -E2BIG;
892 
893         ufd = *(u32 *)value;
894         new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
895         if (IS_ERR(new_ptr))
896                 return PTR_ERR(new_ptr);
897 
898         if (map->ops->map_poke_run) {
899                 mutex_lock(&array->aux->poke_mutex);
900                 old_ptr = xchg(array->ptrs + index, new_ptr);
901                 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
902                 mutex_unlock(&array->aux->poke_mutex);
903         } else {
904                 old_ptr = xchg(array->ptrs + index, new_ptr);
905         }
906 
907         if (old_ptr)
908                 map->ops->map_fd_put_ptr(map, old_ptr, true);
909         return 0;
910 }
911 
912 static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
913 {
914         struct bpf_array *array = container_of(map, struct bpf_array, map);
915         void *old_ptr;
916         u32 index = *(u32 *)key;
917 
918         if (index >= array->map.max_entries)
919                 return -E2BIG;
920 
921         if (map->ops->map_poke_run) {
922                 mutex_lock(&array->aux->poke_mutex);
923                 old_ptr = xchg(array->ptrs + index, NULL);
924                 map->ops->map_poke_run(map, index, old_ptr, NULL);
925                 mutex_unlock(&array->aux->poke_mutex);
926         } else {
927                 old_ptr = xchg(array->ptrs + index, NULL);
928         }
929 
930         if (old_ptr) {
931                 map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
932                 return 0;
933         } else {
934                 return -ENOENT;
935         }
936 }
937 
938 static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
939 {
940         return __fd_array_map_delete_elem(map, key, true);
941 }
942 
943 static void *prog_fd_array_get_ptr(struct bpf_map *map,
944                                    struct file *map_file, int fd)
945 {
946         struct bpf_prog *prog = bpf_prog_get(fd);
947 
948         if (IS_ERR(prog))
949                 return prog;
950 
951         if (!bpf_prog_map_compatible(map, prog)) {
952                 bpf_prog_put(prog);
953                 return ERR_PTR(-EINVAL);
954         }
955 
956         return prog;
957 }
958 
959 static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
960 {
961         /* bpf_prog is freed after one RCU or tasks trace grace period */
962         bpf_prog_put(ptr);
963 }
964 
965 static u32 prog_fd_array_sys_lookup_elem(void *ptr)
966 {
967         return ((struct bpf_prog *)ptr)->aux->id;
968 }
969 
970 /* decrement refcnt of all bpf_progs that are stored in this map */
971 static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
972 {
973         struct bpf_array *array = container_of(map, struct bpf_array, map);
974         int i;
975 
976         for (i = 0; i < array->map.max_entries; i++)
977                 __fd_array_map_delete_elem(map, &i, need_defer);
978 }
979 
980 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
981                                          struct seq_file *m)
982 {
983         void **elem, *ptr;
984         u32 prog_id;
985 
986         rcu_read_lock();
987 
988         elem = array_map_lookup_elem(map, key);
989         if (elem) {
990                 ptr = READ_ONCE(*elem);
991                 if (ptr) {
992                         seq_printf(m, "%u: ", *(u32 *)key);
993                         prog_id = prog_fd_array_sys_lookup_elem(ptr);
994                         btf_type_seq_show(map->btf, map->btf_value_type_id,
995                                           &prog_id, m);
996                         seq_puts(m, "\n");
997                 }
998         }
999 
1000         rcu_read_unlock();
1001 }
1002 
1003 struct prog_poke_elem {
1004         struct list_head list;
1005         struct bpf_prog_aux *aux;
1006 };
1007 
1008 static int prog_array_map_poke_track(struct bpf_map *map,
1009                                      struct bpf_prog_aux *prog_aux)
1010 {
1011         struct prog_poke_elem *elem;
1012         struct bpf_array_aux *aux;
1013         int ret = 0;
1014 
1015         aux = container_of(map, struct bpf_array, map)->aux;
1016         mutex_lock(&aux->poke_mutex);
1017         list_for_each_entry(elem, &aux->poke_progs, list) {
1018                 if (elem->aux == prog_aux)
1019                         goto out;
1020         }
1021 
1022         elem = kmalloc(sizeof(*elem), GFP_KERNEL);
1023         if (!elem) {
1024                 ret = -ENOMEM;
1025                 goto out;
1026         }
1027 
1028         INIT_LIST_HEAD(&elem->list);
1029         /* We must track the program's aux info at this point in time
1030          * since the program pointer itself may not be stable yet, see
1031          * also comment in prog_array_map_poke_run().
1032          */
1033         elem->aux = prog_aux;
1034 
1035         list_add_tail(&elem->list, &aux->poke_progs);
1036 out:
1037         mutex_unlock(&aux->poke_mutex);
1038         return ret;
1039 }
1040 
1041 static void prog_array_map_poke_untrack(struct bpf_map *map,
1042                                         struct bpf_prog_aux *prog_aux)
1043 {
1044         struct prog_poke_elem *elem, *tmp;
1045         struct bpf_array_aux *aux;
1046 
1047         aux = container_of(map, struct bpf_array, map)->aux;
1048         mutex_lock(&aux->poke_mutex);
1049         list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1050                 if (elem->aux == prog_aux) {
1051                         list_del_init(&elem->list);
1052                         kfree(elem);
1053                         break;
1054                 }
1055         }
1056         mutex_unlock(&aux->poke_mutex);
1057 }
1058 
1059 void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
1060                                       struct bpf_prog *new, struct bpf_prog *old)
1061 {
1062         WARN_ON_ONCE(1);
1063 }
1064 
1065 static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
1066                                     struct bpf_prog *old,
1067                                     struct bpf_prog *new)
1068 {
1069         struct prog_poke_elem *elem;
1070         struct bpf_array_aux *aux;
1071 
1072         aux = container_of(map, struct bpf_array, map)->aux;
1073         WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
1074 
1075         list_for_each_entry(elem, &aux->poke_progs, list) {
1076                 struct bpf_jit_poke_descriptor *poke;
1077                 int i;
1078 
1079                 for (i = 0; i < elem->aux->size_poke_tab; i++) {
1080                         poke = &elem->aux->poke_tab[i];
1081 
1082                         /* Few things to be aware of:
1083                          *
1084                          * 1) We can only ever access aux in this context, but
1085                          *    not aux->prog since it might not be stable yet and
1086                          *    there could be danger of use after free otherwise.
1087                          * 2) Initially when we start tracking aux, the program
1088                          *    is not JITed yet and also does not have a kallsyms
1089                          *    entry. We skip these as poke->tailcall_target_stable
1090                          *    is not active yet. The JIT will do the final fixup
1091                          *    before setting it stable. The various
1092                          *    poke->tailcall_target_stable are successively
1093                          *    activated, so tail call updates can arrive from here
1094                          *    while JIT is still finishing its final fixup for
1095                          *    non-activated poke entries.
1096                          * 3) Also programs reaching refcount of zero while patching
1097                          *    is in progress is okay since we're protected under
1098                          *    poke_mutex and untrack the programs before the JIT
1099                          *    buffer is freed.
1100                          */
1101                         if (!READ_ONCE(poke->tailcall_target_stable))
1102                                 continue;
1103                         if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
1104                                 continue;
1105                         if (poke->tail_call.map != map ||
1106                             poke->tail_call.key != key)
1107                                 continue;
1108 
1109                         bpf_arch_poke_desc_update(poke, new, old);
1110                 }
1111         }
1112 }
1113 
1114 static void prog_array_map_clear_deferred(struct work_struct *work)
1115 {
1116         struct bpf_map *map = container_of(work, struct bpf_array_aux,
1117                                            work)->map;
1118         bpf_fd_array_map_clear(map, true);
1119         bpf_map_put(map);
1120 }
1121 
1122 static void prog_array_map_clear(struct bpf_map *map)
1123 {
1124         struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1125                                                  map)->aux;
1126         bpf_map_inc(map);
1127         schedule_work(&aux->work);
1128 }
1129 
1130 static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1131 {
1132         struct bpf_array_aux *aux;
1133         struct bpf_map *map;
1134 
1135         aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT);
1136         if (!aux)
1137                 return ERR_PTR(-ENOMEM);
1138 
1139         INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1140         INIT_LIST_HEAD(&aux->poke_progs);
1141         mutex_init(&aux->poke_mutex);
1142 
1143         map = array_map_alloc(attr);
1144         if (IS_ERR(map)) {
1145                 kfree(aux);
1146                 return map;
1147         }
1148 
1149         container_of(map, struct bpf_array, map)->aux = aux;
1150         aux->map = map;
1151 
1152         return map;
1153 }
1154 
1155 static void prog_array_map_free(struct bpf_map *map)
1156 {
1157         struct prog_poke_elem *elem, *tmp;
1158         struct bpf_array_aux *aux;
1159 
1160         aux = container_of(map, struct bpf_array, map)->aux;
1161         list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1162                 list_del_init(&elem->list);
1163                 kfree(elem);
1164         }
1165         kfree(aux);
1166         fd_array_map_free(map);
1167 }
1168 
1169 /* prog_array->aux->{type,jited} is a runtime binding.
1170  * Doing static check alone in the verifier is not enough.
1171  * Thus, prog_array_map cannot be used as an inner_map
1172  * and map_meta_equal is not implemented.
1173  */
1174 const struct bpf_map_ops prog_array_map_ops = {
1175         .map_alloc_check = fd_array_map_alloc_check,
1176         .map_alloc = prog_array_map_alloc,
1177         .map_free = prog_array_map_free,
1178         .map_poke_track = prog_array_map_poke_track,
1179         .map_poke_untrack = prog_array_map_poke_untrack,
1180         .map_poke_run = prog_array_map_poke_run,
1181         .map_get_next_key = array_map_get_next_key,
1182         .map_lookup_elem = fd_array_map_lookup_elem,
1183         .map_delete_elem = fd_array_map_delete_elem,
1184         .map_fd_get_ptr = prog_fd_array_get_ptr,
1185         .map_fd_put_ptr = prog_fd_array_put_ptr,
1186         .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
1187         .map_release_uref = prog_array_map_clear,
1188         .map_seq_show_elem = prog_array_map_seq_show_elem,
1189         .map_mem_usage = array_map_mem_usage,
1190         .map_btf_id = &array_map_btf_ids[0],
1191 };
1192 
1193 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1194                                                    struct file *map_file)
1195 {
1196         struct bpf_event_entry *ee;
1197 
1198         ee = kzalloc(sizeof(*ee), GFP_KERNEL);
1199         if (ee) {
1200                 ee->event = perf_file->private_data;
1201                 ee->perf_file = perf_file;
1202                 ee->map_file = map_file;
1203         }
1204 
1205         return ee;
1206 }
1207 
1208 static void __bpf_event_entry_free(struct rcu_head *rcu)
1209 {
1210         struct bpf_event_entry *ee;
1211 
1212         ee = container_of(rcu, struct bpf_event_entry, rcu);
1213         fput(ee->perf_file);
1214         kfree(ee);
1215 }
1216 
1217 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1218 {
1219         call_rcu(&ee->rcu, __bpf_event_entry_free);
1220 }
1221 
1222 static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1223                                          struct file *map_file, int fd)
1224 {
1225         struct bpf_event_entry *ee;
1226         struct perf_event *event;
1227         struct file *perf_file;
1228         u64 value;
1229 
1230         perf_file = perf_event_get(fd);
1231         if (IS_ERR(perf_file))
1232                 return perf_file;
1233 
1234         ee = ERR_PTR(-EOPNOTSUPP);
1235         event = perf_file->private_data;
1236         if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
1237                 goto err_out;
1238 
1239         ee = bpf_event_entry_gen(perf_file, map_file);
1240         if (ee)
1241                 return ee;
1242         ee = ERR_PTR(-ENOMEM);
1243 err_out:
1244         fput(perf_file);
1245         return ee;
1246 }
1247 
1248 static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1249 {
1250         /* bpf_perf_event is freed after one RCU grace period */
1251         bpf_event_entry_free_rcu(ptr);
1252 }
1253 
1254 static void perf_event_fd_array_release(struct bpf_map *map,
1255                                         struct file *map_file)
1256 {
1257         struct bpf_array *array = container_of(map, struct bpf_array, map);
1258         struct bpf_event_entry *ee;
1259         int i;
1260 
1261         if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1262                 return;
1263 
1264         rcu_read_lock();
1265         for (i = 0; i < array->map.max_entries; i++) {
1266                 ee = READ_ONCE(array->ptrs[i]);
1267                 if (ee && ee->map_file == map_file)
1268                         __fd_array_map_delete_elem(map, &i, true);
1269         }
1270         rcu_read_unlock();
1271 }
1272 
1273 static void perf_event_fd_array_map_free(struct bpf_map *map)
1274 {
1275         if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1276                 bpf_fd_array_map_clear(map, false);
1277         fd_array_map_free(map);
1278 }
1279 
1280 const struct bpf_map_ops perf_event_array_map_ops = {
1281         .map_meta_equal = bpf_map_meta_equal,
1282         .map_alloc_check = fd_array_map_alloc_check,
1283         .map_alloc = array_map_alloc,
1284         .map_free = perf_event_fd_array_map_free,
1285         .map_get_next_key = array_map_get_next_key,
1286         .map_lookup_elem = fd_array_map_lookup_elem,
1287         .map_delete_elem = fd_array_map_delete_elem,
1288         .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1289         .map_fd_put_ptr = perf_event_fd_array_put_ptr,
1290         .map_release = perf_event_fd_array_release,
1291         .map_check_btf = map_check_no_btf,
1292         .map_mem_usage = array_map_mem_usage,
1293         .map_btf_id = &array_map_btf_ids[0],
1294 };
1295 
1296 #ifdef CONFIG_CGROUPS
1297 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1298                                      struct file *map_file /* not used */,
1299                                      int fd)
1300 {
1301         return cgroup_get_from_fd(fd);
1302 }
1303 
1304 static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
1305 {
1306         /* cgroup_put free cgrp after a rcu grace period */
1307         cgroup_put(ptr);
1308 }
1309 
1310 static void cgroup_fd_array_free(struct bpf_map *map)
1311 {
1312         bpf_fd_array_map_clear(map, false);
1313         fd_array_map_free(map);
1314 }
1315 
1316 const struct bpf_map_ops cgroup_array_map_ops = {
1317         .map_meta_equal = bpf_map_meta_equal,
1318         .map_alloc_check = fd_array_map_alloc_check,
1319         .map_alloc = array_map_alloc,
1320         .map_free = cgroup_fd_array_free,
1321         .map_get_next_key = array_map_get_next_key,
1322         .map_lookup_elem = fd_array_map_lookup_elem,
1323         .map_delete_elem = fd_array_map_delete_elem,
1324         .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1325         .map_fd_put_ptr = cgroup_fd_array_put_ptr,
1326         .map_check_btf = map_check_no_btf,
1327         .map_mem_usage = array_map_mem_usage,
1328         .map_btf_id = &array_map_btf_ids[0],
1329 };
1330 #endif
1331 
1332 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1333 {
1334         struct bpf_map *map, *inner_map_meta;
1335 
1336         inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1337         if (IS_ERR(inner_map_meta))
1338                 return inner_map_meta;
1339 
1340         map = array_map_alloc(attr);
1341         if (IS_ERR(map)) {
1342                 bpf_map_meta_free(inner_map_meta);
1343                 return map;
1344         }
1345 
1346         map->inner_map_meta = inner_map_meta;
1347 
1348         return map;
1349 }
1350 
1351 static void array_of_map_free(struct bpf_map *map)
1352 {
1353         /* map->inner_map_meta is only accessed by syscall which
1354          * is protected by fdget/fdput.
1355          */
1356         bpf_map_meta_free(map->inner_map_meta);
1357         bpf_fd_array_map_clear(map, false);
1358         fd_array_map_free(map);
1359 }
1360 
1361 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1362 {
1363         struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1364 
1365         if (!inner_map)
1366                 return NULL;
1367 
1368         return READ_ONCE(*inner_map);
1369 }
1370 
1371 static int array_of_map_gen_lookup(struct bpf_map *map,
1372                                    struct bpf_insn *insn_buf)
1373 {
1374         struct bpf_array *array = container_of(map, struct bpf_array, map);
1375         u32 elem_size = array->elem_size;
1376         struct bpf_insn *insn = insn_buf;
1377         const int ret = BPF_REG_0;
1378         const int map_ptr = BPF_REG_1;
1379         const int index = BPF_REG_2;
1380 
1381         *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1382         *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
1383         if (!map->bypass_spec_v1) {
1384                 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1385                 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1386         } else {
1387                 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1388         }
1389         if (is_power_of_2(elem_size))
1390                 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1391         else
1392                 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1393         *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1394         *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1395         *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1396         *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1397         *insn++ = BPF_MOV64_IMM(ret, 0);
1398 
1399         return insn - insn_buf;
1400 }
1401 
1402 const struct bpf_map_ops array_of_maps_map_ops = {
1403         .map_alloc_check = fd_array_map_alloc_check,
1404         .map_alloc = array_of_map_alloc,
1405         .map_free = array_of_map_free,
1406         .map_get_next_key = array_map_get_next_key,
1407         .map_lookup_elem = array_of_map_lookup_elem,
1408         .map_delete_elem = fd_array_map_delete_elem,
1409         .map_fd_get_ptr = bpf_map_fd_get_ptr,
1410         .map_fd_put_ptr = bpf_map_fd_put_ptr,
1411         .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
1412         .map_gen_lookup = array_of_map_gen_lookup,
1413         .map_lookup_batch = generic_map_lookup_batch,
1414         .map_update_batch = generic_map_update_batch,
1415         .map_check_btf = map_check_no_btf,
1416         .map_mem_usage = array_map_mem_usage,
1417         .map_btf_id = &array_map_btf_ids[0],
1418 };
1419 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php