1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/jhash.h> 8 #include <linux/filter.h> 9 #include <linux/rculist_nulls.h> 10 #include <linux/rcupdate_wait.h> 11 #include <linux/random.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 #include "percpu_freelist.h" 16 #include "bpf_lru_list.h" 17 #include "map_in_map.h" 18 #include <linux/bpf_mem_alloc.h> 19 20 #define HTAB_CREATE_FLAG_MASK \ 21 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 22 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 23 24 #define BATCH_OPS(_name) \ 25 .map_lookup_batch = \ 26 _name##_map_lookup_batch, \ 27 .map_lookup_and_delete_batch = \ 28 _name##_map_lookup_and_delete_batch, \ 29 .map_update_batch = \ 30 generic_map_update_batch, \ 31 .map_delete_batch = \ 32 generic_map_delete_batch 33 34 /* 35 * The bucket lock has two protection scopes: 36 * 37 * 1) Serializing concurrent operations from BPF programs on different 38 * CPUs 39 * 40 * 2) Serializing concurrent operations from BPF programs and sys_bpf() 41 * 42 * BPF programs can execute in any context including perf, kprobes and 43 * tracing. As there are almost no limits where perf, kprobes and tracing 44 * can be invoked from the lock operations need to be protected against 45 * deadlocks. Deadlocks can be caused by recursion and by an invocation in 46 * the lock held section when functions which acquire this lock are invoked 47 * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU 48 * variable bpf_prog_active, which prevents BPF programs attached to perf 49 * events, kprobes and tracing to be invoked before the prior invocation 50 * from one of these contexts completed. sys_bpf() uses the same mechanism 51 * by pinning the task to the current CPU and incrementing the recursion 52 * protection across the map operation. 53 * 54 * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain 55 * operations like memory allocations (even with GFP_ATOMIC) from atomic 56 * contexts. This is required because even with GFP_ATOMIC the memory 57 * allocator calls into code paths which acquire locks with long held lock 58 * sections. To ensure the deterministic behaviour these locks are regular 59 * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only 60 * true atomic contexts on an RT kernel are the low level hardware 61 * handling, scheduling, low level interrupt handling, NMIs etc. None of 62 * these contexts should ever do memory allocations. 63 * 64 * As regular device interrupt handlers and soft interrupts are forced into 65 * thread context, the existing code which does 66 * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); 67 * just works. 68 * 69 * In theory the BPF locks could be converted to regular spinlocks as well, 70 * but the bucket locks and percpu_freelist locks can be taken from 71 * arbitrary contexts (perf, kprobes, tracepoints) which are required to be 72 * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, 73 * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, 74 * because there is no memory allocation within the lock held sections. However 75 * after hash map was fully converted to use bpf_mem_alloc, there will be 76 * non-synchronous memory allocation for non-preallocated hash map, so it is 77 * safe to always use raw spinlock for bucket lock. 78 */ 79 struct bucket { 80 struct hlist_nulls_head head; 81 raw_spinlock_t raw_lock; 82 }; 83 84 #define HASHTAB_MAP_LOCK_COUNT 8 85 #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) 86 87 struct bpf_htab { 88 struct bpf_map map; 89 struct bpf_mem_alloc ma; 90 struct bpf_mem_alloc pcpu_ma; 91 struct bucket *buckets; 92 void *elems; 93 union { 94 struct pcpu_freelist freelist; 95 struct bpf_lru lru; 96 }; 97 struct htab_elem *__percpu *extra_elems; 98 /* number of elements in non-preallocated hashtable are kept 99 * in either pcount or count 100 */ 101 struct percpu_counter pcount; 102 atomic_t count; 103 bool use_percpu_counter; 104 u32 n_buckets; /* number of hash buckets */ 105 u32 elem_size; /* size of each element in bytes */ 106 u32 hashrnd; 107 struct lock_class_key lockdep_key; 108 int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT]; 109 }; 110 111 /* each htab element is struct htab_elem + key + value */ 112 struct htab_elem { 113 union { 114 struct hlist_nulls_node hash_node; 115 struct { 116 void *padding; 117 union { 118 struct pcpu_freelist_node fnode; 119 struct htab_elem *batch_flink; 120 }; 121 }; 122 }; 123 union { 124 /* pointer to per-cpu pointer */ 125 void *ptr_to_pptr; 126 struct bpf_lru_node lru_node; 127 }; 128 u32 hash; 129 char key[] __aligned(8); 130 }; 131 132 static inline bool htab_is_prealloc(const struct bpf_htab *htab) 133 { 134 return !(htab->map.map_flags & BPF_F_NO_PREALLOC); 135 } 136 137 static void htab_init_buckets(struct bpf_htab *htab) 138 { 139 unsigned int i; 140 141 for (i = 0; i < htab->n_buckets; i++) { 142 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 143 raw_spin_lock_init(&htab->buckets[i].raw_lock); 144 lockdep_set_class(&htab->buckets[i].raw_lock, 145 &htab->lockdep_key); 146 cond_resched(); 147 } 148 } 149 150 static inline int htab_lock_bucket(const struct bpf_htab *htab, 151 struct bucket *b, u32 hash, 152 unsigned long *pflags) 153 { 154 unsigned long flags; 155 156 hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); 157 158 preempt_disable(); 159 local_irq_save(flags); 160 if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { 161 __this_cpu_dec(*(htab->map_locked[hash])); 162 local_irq_restore(flags); 163 preempt_enable(); 164 return -EBUSY; 165 } 166 167 raw_spin_lock(&b->raw_lock); 168 *pflags = flags; 169 170 return 0; 171 } 172 173 static inline void htab_unlock_bucket(const struct bpf_htab *htab, 174 struct bucket *b, u32 hash, 175 unsigned long flags) 176 { 177 hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); 178 raw_spin_unlock(&b->raw_lock); 179 __this_cpu_dec(*(htab->map_locked[hash])); 180 local_irq_restore(flags); 181 preempt_enable(); 182 } 183 184 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); 185 186 static bool htab_is_lru(const struct bpf_htab *htab) 187 { 188 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || 189 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 190 } 191 192 static bool htab_is_percpu(const struct bpf_htab *htab) 193 { 194 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || 195 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 196 } 197 198 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 199 void __percpu *pptr) 200 { 201 *(void __percpu **)(l->key + key_size) = pptr; 202 } 203 204 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 205 { 206 return *(void __percpu **)(l->key + key_size); 207 } 208 209 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) 210 { 211 return *(void **)(l->key + roundup(map->key_size, 8)); 212 } 213 214 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 215 { 216 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); 217 } 218 219 static bool htab_has_extra_elems(struct bpf_htab *htab) 220 { 221 return !htab_is_percpu(htab) && !htab_is_lru(htab); 222 } 223 224 static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab) 225 { 226 u32 num_entries = htab->map.max_entries; 227 int i; 228 229 if (htab_has_extra_elems(htab)) 230 num_entries += num_possible_cpus(); 231 232 for (i = 0; i < num_entries; i++) { 233 struct htab_elem *elem; 234 235 elem = get_htab_elem(htab, i); 236 if (btf_record_has_field(htab->map.record, BPF_TIMER)) 237 bpf_obj_free_timer(htab->map.record, 238 elem->key + round_up(htab->map.key_size, 8)); 239 if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) 240 bpf_obj_free_workqueue(htab->map.record, 241 elem->key + round_up(htab->map.key_size, 8)); 242 cond_resched(); 243 } 244 } 245 246 static void htab_free_prealloced_fields(struct bpf_htab *htab) 247 { 248 u32 num_entries = htab->map.max_entries; 249 int i; 250 251 if (IS_ERR_OR_NULL(htab->map.record)) 252 return; 253 if (htab_has_extra_elems(htab)) 254 num_entries += num_possible_cpus(); 255 for (i = 0; i < num_entries; i++) { 256 struct htab_elem *elem; 257 258 elem = get_htab_elem(htab, i); 259 if (htab_is_percpu(htab)) { 260 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 261 int cpu; 262 263 for_each_possible_cpu(cpu) { 264 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 265 cond_resched(); 266 } 267 } else { 268 bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 269 cond_resched(); 270 } 271 cond_resched(); 272 } 273 } 274 275 static void htab_free_elems(struct bpf_htab *htab) 276 { 277 int i; 278 279 if (!htab_is_percpu(htab)) 280 goto free_elems; 281 282 for (i = 0; i < htab->map.max_entries; i++) { 283 void __percpu *pptr; 284 285 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 286 htab->map.key_size); 287 free_percpu(pptr); 288 cond_resched(); 289 } 290 free_elems: 291 bpf_map_area_free(htab->elems); 292 } 293 294 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 295 * (bucket_lock). If both locks need to be acquired together, the lock 296 * order is always lru_lock -> bucket_lock and this only happens in 297 * bpf_lru_list.c logic. For example, certain code path of 298 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 299 * will acquire lru_lock first followed by acquiring bucket_lock. 300 * 301 * In hashtab.c, to avoid deadlock, lock acquisition of 302 * bucket_lock followed by lru_lock is not allowed. In such cases, 303 * bucket_lock needs to be released first before acquiring lru_lock. 304 */ 305 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 306 u32 hash) 307 { 308 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); 309 struct htab_elem *l; 310 311 if (node) { 312 bpf_map_inc_elem_count(&htab->map); 313 l = container_of(node, struct htab_elem, lru_node); 314 memcpy(l->key, key, htab->map.key_size); 315 return l; 316 } 317 318 return NULL; 319 } 320 321 static int prealloc_init(struct bpf_htab *htab) 322 { 323 u32 num_entries = htab->map.max_entries; 324 int err = -ENOMEM, i; 325 326 if (htab_has_extra_elems(htab)) 327 num_entries += num_possible_cpus(); 328 329 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, 330 htab->map.numa_node); 331 if (!htab->elems) 332 return -ENOMEM; 333 334 if (!htab_is_percpu(htab)) 335 goto skip_percpu_elems; 336 337 for (i = 0; i < num_entries; i++) { 338 u32 size = round_up(htab->map.value_size, 8); 339 void __percpu *pptr; 340 341 pptr = bpf_map_alloc_percpu(&htab->map, size, 8, 342 GFP_USER | __GFP_NOWARN); 343 if (!pptr) 344 goto free_elems; 345 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 346 pptr); 347 cond_resched(); 348 } 349 350 skip_percpu_elems: 351 if (htab_is_lru(htab)) 352 err = bpf_lru_init(&htab->lru, 353 htab->map.map_flags & BPF_F_NO_COMMON_LRU, 354 offsetof(struct htab_elem, hash) - 355 offsetof(struct htab_elem, lru_node), 356 htab_lru_map_delete_node, 357 htab); 358 else 359 err = pcpu_freelist_init(&htab->freelist); 360 361 if (err) 362 goto free_elems; 363 364 if (htab_is_lru(htab)) 365 bpf_lru_populate(&htab->lru, htab->elems, 366 offsetof(struct htab_elem, lru_node), 367 htab->elem_size, num_entries); 368 else 369 pcpu_freelist_populate(&htab->freelist, 370 htab->elems + offsetof(struct htab_elem, fnode), 371 htab->elem_size, num_entries); 372 373 return 0; 374 375 free_elems: 376 htab_free_elems(htab); 377 return err; 378 } 379 380 static void prealloc_destroy(struct bpf_htab *htab) 381 { 382 htab_free_elems(htab); 383 384 if (htab_is_lru(htab)) 385 bpf_lru_destroy(&htab->lru); 386 else 387 pcpu_freelist_destroy(&htab->freelist); 388 } 389 390 static int alloc_extra_elems(struct bpf_htab *htab) 391 { 392 struct htab_elem *__percpu *pptr, *l_new; 393 struct pcpu_freelist_node *l; 394 int cpu; 395 396 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, 397 GFP_USER | __GFP_NOWARN); 398 if (!pptr) 399 return -ENOMEM; 400 401 for_each_possible_cpu(cpu) { 402 l = pcpu_freelist_pop(&htab->freelist); 403 /* pop will succeed, since prealloc_init() 404 * preallocated extra num_possible_cpus elements 405 */ 406 l_new = container_of(l, struct htab_elem, fnode); 407 *per_cpu_ptr(pptr, cpu) = l_new; 408 } 409 htab->extra_elems = pptr; 410 return 0; 411 } 412 413 /* Called from syscall */ 414 static int htab_map_alloc_check(union bpf_attr *attr) 415 { 416 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 417 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 418 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 419 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 420 /* percpu_lru means each cpu has its own LRU list. 421 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 422 * the map's value itself is percpu. percpu_lru has 423 * nothing to do with the map's value. 424 */ 425 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 426 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 427 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); 428 int numa_node = bpf_map_attr_numa_node(attr); 429 430 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 431 offsetof(struct htab_elem, hash_node.pprev)); 432 433 if (zero_seed && !capable(CAP_SYS_ADMIN)) 434 /* Guard against local DoS, and discourage production use. */ 435 return -EPERM; 436 437 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || 438 !bpf_map_flags_access_ok(attr->map_flags)) 439 return -EINVAL; 440 441 if (!lru && percpu_lru) 442 return -EINVAL; 443 444 if (lru && !prealloc) 445 return -ENOTSUPP; 446 447 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 448 return -EINVAL; 449 450 /* check sanity of attributes. 451 * value_size == 0 may be allowed in the future to use map as a set 452 */ 453 if (attr->max_entries == 0 || attr->key_size == 0 || 454 attr->value_size == 0) 455 return -EINVAL; 456 457 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 458 sizeof(struct htab_elem)) 459 /* if key_size + value_size is bigger, the user space won't be 460 * able to access the elements via bpf syscall. This check 461 * also makes sure that the elem_size doesn't overflow and it's 462 * kmalloc-able later in htab_map_update_elem() 463 */ 464 return -E2BIG; 465 /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ 466 if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) 467 return -E2BIG; 468 469 return 0; 470 } 471 472 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 473 { 474 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 475 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 476 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 477 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 478 /* percpu_lru means each cpu has its own LRU list. 479 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 480 * the map's value itself is percpu. percpu_lru has 481 * nothing to do with the map's value. 482 */ 483 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 484 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 485 struct bpf_htab *htab; 486 int err, i; 487 488 htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); 489 if (!htab) 490 return ERR_PTR(-ENOMEM); 491 492 lockdep_register_key(&htab->lockdep_key); 493 494 bpf_map_init_from_attr(&htab->map, attr); 495 496 if (percpu_lru) { 497 /* ensure each CPU's lru list has >=1 elements. 498 * since we are at it, make each lru list has the same 499 * number of elements. 500 */ 501 htab->map.max_entries = roundup(attr->max_entries, 502 num_possible_cpus()); 503 if (htab->map.max_entries < attr->max_entries) 504 htab->map.max_entries = rounddown(attr->max_entries, 505 num_possible_cpus()); 506 } 507 508 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 509 * into UB on 32-bit arches, so check that first 510 */ 511 err = -E2BIG; 512 if (htab->map.max_entries > 1UL << 31) 513 goto free_htab; 514 515 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 516 517 htab->elem_size = sizeof(struct htab_elem) + 518 round_up(htab->map.key_size, 8); 519 if (percpu) 520 htab->elem_size += sizeof(void *); 521 else 522 htab->elem_size += round_up(htab->map.value_size, 8); 523 524 /* check for u32 overflow */ 525 if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) 526 goto free_htab; 527 528 err = bpf_map_init_elem_count(&htab->map); 529 if (err) 530 goto free_htab; 531 532 err = -ENOMEM; 533 htab->buckets = bpf_map_area_alloc(htab->n_buckets * 534 sizeof(struct bucket), 535 htab->map.numa_node); 536 if (!htab->buckets) 537 goto free_elem_count; 538 539 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) { 540 htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map, 541 sizeof(int), 542 sizeof(int), 543 GFP_USER); 544 if (!htab->map_locked[i]) 545 goto free_map_locked; 546 } 547 548 if (htab->map.map_flags & BPF_F_ZERO_SEED) 549 htab->hashrnd = 0; 550 else 551 htab->hashrnd = get_random_u32(); 552 553 htab_init_buckets(htab); 554 555 /* compute_batch_value() computes batch value as num_online_cpus() * 2 556 * and __percpu_counter_compare() needs 557 * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus() 558 * for percpu_counter to be faster than atomic_t. In practice the average bpf 559 * hash map size is 10k, which means that a system with 64 cpus will fill 560 * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore 561 * define our own batch count as 32 then 10k hash map can be filled up to 80%: 562 * 10k - 8k > 32 _batch_ * 64 _cpus_ 563 * and __percpu_counter_compare() will still be fast. At that point hash map 564 * collisions will dominate its performance anyway. Assume that hash map filled 565 * to 50+% isn't going to be O(1) and use the following formula to choose 566 * between percpu_counter and atomic_t. 567 */ 568 #define PERCPU_COUNTER_BATCH 32 569 if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH) 570 htab->use_percpu_counter = true; 571 572 if (htab->use_percpu_counter) { 573 err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL); 574 if (err) 575 goto free_map_locked; 576 } 577 578 if (prealloc) { 579 err = prealloc_init(htab); 580 if (err) 581 goto free_map_locked; 582 583 if (!percpu && !lru) { 584 /* lru itself can remove the least used element, so 585 * there is no need for an extra elem during map_update. 586 */ 587 err = alloc_extra_elems(htab); 588 if (err) 589 goto free_prealloc; 590 } 591 } else { 592 err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); 593 if (err) 594 goto free_map_locked; 595 if (percpu) { 596 err = bpf_mem_alloc_init(&htab->pcpu_ma, 597 round_up(htab->map.value_size, 8), true); 598 if (err) 599 goto free_map_locked; 600 } 601 } 602 603 return &htab->map; 604 605 free_prealloc: 606 prealloc_destroy(htab); 607 free_map_locked: 608 if (htab->use_percpu_counter) 609 percpu_counter_destroy(&htab->pcount); 610 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 611 free_percpu(htab->map_locked[i]); 612 bpf_map_area_free(htab->buckets); 613 bpf_mem_alloc_destroy(&htab->pcpu_ma); 614 bpf_mem_alloc_destroy(&htab->ma); 615 free_elem_count: 616 bpf_map_free_elem_count(&htab->map); 617 free_htab: 618 lockdep_unregister_key(&htab->lockdep_key); 619 bpf_map_area_free(htab); 620 return ERR_PTR(err); 621 } 622 623 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) 624 { 625 if (likely(key_len % 4 == 0)) 626 return jhash2(key, key_len / 4, hashrnd); 627 return jhash(key, key_len, hashrnd); 628 } 629 630 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 631 { 632 return &htab->buckets[hash & (htab->n_buckets - 1)]; 633 } 634 635 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 636 { 637 return &__select_bucket(htab, hash)->head; 638 } 639 640 /* this lookup function can only be called with bucket lock taken */ 641 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 642 void *key, u32 key_size) 643 { 644 struct hlist_nulls_node *n; 645 struct htab_elem *l; 646 647 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 648 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 649 return l; 650 651 return NULL; 652 } 653 654 /* can be called without bucket lock. it will repeat the loop in 655 * the unlikely event when elements moved from one bucket into another 656 * while link list is being walked 657 */ 658 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 659 u32 hash, void *key, 660 u32 key_size, u32 n_buckets) 661 { 662 struct hlist_nulls_node *n; 663 struct htab_elem *l; 664 665 again: 666 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 667 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 668 return l; 669 670 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 671 goto again; 672 673 return NULL; 674 } 675 676 /* Called from syscall or from eBPF program directly, so 677 * arguments have to match bpf_map_lookup_elem() exactly. 678 * The return value is adjusted by BPF instructions 679 * in htab_map_gen_lookup(). 680 */ 681 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 682 { 683 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 684 struct hlist_nulls_head *head; 685 struct htab_elem *l; 686 u32 hash, key_size; 687 688 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 689 !rcu_read_lock_bh_held()); 690 691 key_size = map->key_size; 692 693 hash = htab_map_hash(key, key_size, htab->hashrnd); 694 695 head = select_bucket(htab, hash); 696 697 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 698 699 return l; 700 } 701 702 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 703 { 704 struct htab_elem *l = __htab_map_lookup_elem(map, key); 705 706 if (l) 707 return l->key + round_up(map->key_size, 8); 708 709 return NULL; 710 } 711 712 /* inline bpf_map_lookup_elem() call. 713 * Instead of: 714 * bpf_prog 715 * bpf_map_lookup_elem 716 * map->ops->map_lookup_elem 717 * htab_map_lookup_elem 718 * __htab_map_lookup_elem 719 * do: 720 * bpf_prog 721 * __htab_map_lookup_elem 722 */ 723 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 724 { 725 struct bpf_insn *insn = insn_buf; 726 const int ret = BPF_REG_0; 727 728 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 729 (void *(*)(struct bpf_map *map, void *key))NULL)); 730 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 731 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 732 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 733 offsetof(struct htab_elem, key) + 734 round_up(map->key_size, 8)); 735 return insn - insn_buf; 736 } 737 738 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, 739 void *key, const bool mark) 740 { 741 struct htab_elem *l = __htab_map_lookup_elem(map, key); 742 743 if (l) { 744 if (mark) 745 bpf_lru_node_set_ref(&l->lru_node); 746 return l->key + round_up(map->key_size, 8); 747 } 748 749 return NULL; 750 } 751 752 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) 753 { 754 return __htab_lru_map_lookup_elem(map, key, true); 755 } 756 757 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) 758 { 759 return __htab_lru_map_lookup_elem(map, key, false); 760 } 761 762 static int htab_lru_map_gen_lookup(struct bpf_map *map, 763 struct bpf_insn *insn_buf) 764 { 765 struct bpf_insn *insn = insn_buf; 766 const int ret = BPF_REG_0; 767 const int ref_reg = BPF_REG_1; 768 769 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 770 (void *(*)(struct bpf_map *map, void *key))NULL)); 771 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 772 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); 773 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, 774 offsetof(struct htab_elem, lru_node) + 775 offsetof(struct bpf_lru_node, ref)); 776 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); 777 *insn++ = BPF_ST_MEM(BPF_B, ret, 778 offsetof(struct htab_elem, lru_node) + 779 offsetof(struct bpf_lru_node, ref), 780 1); 781 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 782 offsetof(struct htab_elem, key) + 783 round_up(map->key_size, 8)); 784 return insn - insn_buf; 785 } 786 787 static void check_and_free_fields(struct bpf_htab *htab, 788 struct htab_elem *elem) 789 { 790 if (htab_is_percpu(htab)) { 791 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 792 int cpu; 793 794 for_each_possible_cpu(cpu) 795 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 796 } else { 797 void *map_value = elem->key + round_up(htab->map.key_size, 8); 798 799 bpf_obj_free_fields(htab->map.record, map_value); 800 } 801 } 802 803 /* It is called from the bpf_lru_list when the LRU needs to delete 804 * older elements from the htab. 805 */ 806 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 807 { 808 struct bpf_htab *htab = arg; 809 struct htab_elem *l = NULL, *tgt_l; 810 struct hlist_nulls_head *head; 811 struct hlist_nulls_node *n; 812 unsigned long flags; 813 struct bucket *b; 814 int ret; 815 816 tgt_l = container_of(node, struct htab_elem, lru_node); 817 b = __select_bucket(htab, tgt_l->hash); 818 head = &b->head; 819 820 ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags); 821 if (ret) 822 return false; 823 824 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 825 if (l == tgt_l) { 826 hlist_nulls_del_rcu(&l->hash_node); 827 check_and_free_fields(htab, l); 828 bpf_map_dec_elem_count(&htab->map); 829 break; 830 } 831 832 htab_unlock_bucket(htab, b, tgt_l->hash, flags); 833 834 return l == tgt_l; 835 } 836 837 /* Called from syscall */ 838 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 839 { 840 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 841 struct hlist_nulls_head *head; 842 struct htab_elem *l, *next_l; 843 u32 hash, key_size; 844 int i = 0; 845 846 WARN_ON_ONCE(!rcu_read_lock_held()); 847 848 key_size = map->key_size; 849 850 if (!key) 851 goto find_first_elem; 852 853 hash = htab_map_hash(key, key_size, htab->hashrnd); 854 855 head = select_bucket(htab, hash); 856 857 /* lookup the key */ 858 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 859 860 if (!l) 861 goto find_first_elem; 862 863 /* key was found, get next key in the same bucket */ 864 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 865 struct htab_elem, hash_node); 866 867 if (next_l) { 868 /* if next elem in this hash list is non-zero, just return it */ 869 memcpy(next_key, next_l->key, key_size); 870 return 0; 871 } 872 873 /* no more elements in this hash list, go to the next bucket */ 874 i = hash & (htab->n_buckets - 1); 875 i++; 876 877 find_first_elem: 878 /* iterate over buckets */ 879 for (; i < htab->n_buckets; i++) { 880 head = select_bucket(htab, i); 881 882 /* pick first element in the bucket */ 883 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 884 struct htab_elem, hash_node); 885 if (next_l) { 886 /* if it's not empty, just return it */ 887 memcpy(next_key, next_l->key, key_size); 888 return 0; 889 } 890 } 891 892 /* iterated over all buckets and all elements */ 893 return -ENOENT; 894 } 895 896 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 897 { 898 check_and_free_fields(htab, l); 899 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 900 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 901 bpf_mem_cache_free(&htab->ma, l); 902 } 903 904 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) 905 { 906 struct bpf_map *map = &htab->map; 907 void *ptr; 908 909 if (map->ops->map_fd_put_ptr) { 910 ptr = fd_htab_map_get_ptr(map, l); 911 map->ops->map_fd_put_ptr(map, ptr, true); 912 } 913 } 914 915 static bool is_map_full(struct bpf_htab *htab) 916 { 917 if (htab->use_percpu_counter) 918 return __percpu_counter_compare(&htab->pcount, htab->map.max_entries, 919 PERCPU_COUNTER_BATCH) >= 0; 920 return atomic_read(&htab->count) >= htab->map.max_entries; 921 } 922 923 static void inc_elem_count(struct bpf_htab *htab) 924 { 925 bpf_map_inc_elem_count(&htab->map); 926 927 if (htab->use_percpu_counter) 928 percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH); 929 else 930 atomic_inc(&htab->count); 931 } 932 933 static void dec_elem_count(struct bpf_htab *htab) 934 { 935 bpf_map_dec_elem_count(&htab->map); 936 937 if (htab->use_percpu_counter) 938 percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH); 939 else 940 atomic_dec(&htab->count); 941 } 942 943 944 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 945 { 946 htab_put_fd_value(htab, l); 947 948 if (htab_is_prealloc(htab)) { 949 bpf_map_dec_elem_count(&htab->map); 950 check_and_free_fields(htab, l); 951 __pcpu_freelist_push(&htab->freelist, &l->fnode); 952 } else { 953 dec_elem_count(htab); 954 htab_elem_free(htab, l); 955 } 956 } 957 958 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 959 void *value, bool onallcpus) 960 { 961 if (!onallcpus) { 962 /* copy true value_size bytes */ 963 copy_map_value(&htab->map, this_cpu_ptr(pptr), value); 964 } else { 965 u32 size = round_up(htab->map.value_size, 8); 966 int off = 0, cpu; 967 968 for_each_possible_cpu(cpu) { 969 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); 970 off += size; 971 } 972 } 973 } 974 975 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 976 void *value, bool onallcpus) 977 { 978 /* When not setting the initial value on all cpus, zero-fill element 979 * values for other cpus. Otherwise, bpf program has no way to ensure 980 * known initial values for cpus other than current one 981 * (onallcpus=false always when coming from bpf prog). 982 */ 983 if (!onallcpus) { 984 int current_cpu = raw_smp_processor_id(); 985 int cpu; 986 987 for_each_possible_cpu(cpu) { 988 if (cpu == current_cpu) 989 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 990 else /* Since elem is preallocated, we cannot touch special fields */ 991 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 992 } 993 } else { 994 pcpu_copy_value(htab, pptr, value, onallcpus); 995 } 996 } 997 998 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 999 { 1000 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && 1001 BITS_PER_LONG == 64; 1002 } 1003 1004 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 1005 void *value, u32 key_size, u32 hash, 1006 bool percpu, bool onallcpus, 1007 struct htab_elem *old_elem) 1008 { 1009 u32 size = htab->map.value_size; 1010 bool prealloc = htab_is_prealloc(htab); 1011 struct htab_elem *l_new, **pl_new; 1012 void __percpu *pptr; 1013 1014 if (prealloc) { 1015 if (old_elem) { 1016 /* if we're updating the existing element, 1017 * use per-cpu extra elems to avoid freelist_pop/push 1018 */ 1019 pl_new = this_cpu_ptr(htab->extra_elems); 1020 l_new = *pl_new; 1021 htab_put_fd_value(htab, old_elem); 1022 *pl_new = old_elem; 1023 } else { 1024 struct pcpu_freelist_node *l; 1025 1026 l = __pcpu_freelist_pop(&htab->freelist); 1027 if (!l) 1028 return ERR_PTR(-E2BIG); 1029 l_new = container_of(l, struct htab_elem, fnode); 1030 bpf_map_inc_elem_count(&htab->map); 1031 } 1032 } else { 1033 if (is_map_full(htab)) 1034 if (!old_elem) 1035 /* when map is full and update() is replacing 1036 * old element, it's ok to allocate, since 1037 * old element will be freed immediately. 1038 * Otherwise return an error 1039 */ 1040 return ERR_PTR(-E2BIG); 1041 inc_elem_count(htab); 1042 l_new = bpf_mem_cache_alloc(&htab->ma); 1043 if (!l_new) { 1044 l_new = ERR_PTR(-ENOMEM); 1045 goto dec_count; 1046 } 1047 } 1048 1049 memcpy(l_new->key, key, key_size); 1050 if (percpu) { 1051 if (prealloc) { 1052 pptr = htab_elem_get_ptr(l_new, key_size); 1053 } else { 1054 /* alloc_percpu zero-fills */ 1055 pptr = bpf_mem_cache_alloc(&htab->pcpu_ma); 1056 if (!pptr) { 1057 bpf_mem_cache_free(&htab->ma, l_new); 1058 l_new = ERR_PTR(-ENOMEM); 1059 goto dec_count; 1060 } 1061 l_new->ptr_to_pptr = pptr; 1062 pptr = *(void **)pptr; 1063 } 1064 1065 pcpu_init_value(htab, pptr, value, onallcpus); 1066 1067 if (!prealloc) 1068 htab_elem_set_ptr(l_new, key_size, pptr); 1069 } else if (fd_htab_map_needs_adjust(htab)) { 1070 size = round_up(size, 8); 1071 memcpy(l_new->key + round_up(key_size, 8), value, size); 1072 } else { 1073 copy_map_value(&htab->map, 1074 l_new->key + round_up(key_size, 8), 1075 value); 1076 } 1077 1078 l_new->hash = hash; 1079 return l_new; 1080 dec_count: 1081 dec_elem_count(htab); 1082 return l_new; 1083 } 1084 1085 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 1086 u64 map_flags) 1087 { 1088 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 1089 /* elem already exists */ 1090 return -EEXIST; 1091 1092 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 1093 /* elem doesn't exist, cannot update it */ 1094 return -ENOENT; 1095 1096 return 0; 1097 } 1098 1099 /* Called from syscall or from eBPF program */ 1100 static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, 1101 u64 map_flags) 1102 { 1103 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1104 struct htab_elem *l_new = NULL, *l_old; 1105 struct hlist_nulls_head *head; 1106 unsigned long flags; 1107 struct bucket *b; 1108 u32 key_size, hash; 1109 int ret; 1110 1111 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 1112 /* unknown flags */ 1113 return -EINVAL; 1114 1115 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1116 !rcu_read_lock_bh_held()); 1117 1118 key_size = map->key_size; 1119 1120 hash = htab_map_hash(key, key_size, htab->hashrnd); 1121 1122 b = __select_bucket(htab, hash); 1123 head = &b->head; 1124 1125 if (unlikely(map_flags & BPF_F_LOCK)) { 1126 if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1127 return -EINVAL; 1128 /* find an element without taking the bucket lock */ 1129 l_old = lookup_nulls_elem_raw(head, hash, key, key_size, 1130 htab->n_buckets); 1131 ret = check_flags(htab, l_old, map_flags); 1132 if (ret) 1133 return ret; 1134 if (l_old) { 1135 /* grab the element lock and update value in place */ 1136 copy_map_value_locked(map, 1137 l_old->key + round_up(key_size, 8), 1138 value, false); 1139 return 0; 1140 } 1141 /* fall through, grab the bucket lock and lookup again. 1142 * 99.9% chance that the element won't be found, 1143 * but second lookup under lock has to be done. 1144 */ 1145 } 1146 1147 ret = htab_lock_bucket(htab, b, hash, &flags); 1148 if (ret) 1149 return ret; 1150 1151 l_old = lookup_elem_raw(head, hash, key, key_size); 1152 1153 ret = check_flags(htab, l_old, map_flags); 1154 if (ret) 1155 goto err; 1156 1157 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { 1158 /* first lookup without the bucket lock didn't find the element, 1159 * but second lookup with the bucket lock found it. 1160 * This case is highly unlikely, but has to be dealt with: 1161 * grab the element lock in addition to the bucket lock 1162 * and update element in place 1163 */ 1164 copy_map_value_locked(map, 1165 l_old->key + round_up(key_size, 8), 1166 value, false); 1167 ret = 0; 1168 goto err; 1169 } 1170 1171 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1172 l_old); 1173 if (IS_ERR(l_new)) { 1174 /* all pre-allocated elements are in use or memory exhausted */ 1175 ret = PTR_ERR(l_new); 1176 goto err; 1177 } 1178 1179 /* add new element to the head of the list, so that 1180 * concurrent search will find it before old elem 1181 */ 1182 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1183 if (l_old) { 1184 hlist_nulls_del_rcu(&l_old->hash_node); 1185 if (!htab_is_prealloc(htab)) 1186 free_htab_elem(htab, l_old); 1187 else 1188 check_and_free_fields(htab, l_old); 1189 } 1190 ret = 0; 1191 err: 1192 htab_unlock_bucket(htab, b, hash, flags); 1193 return ret; 1194 } 1195 1196 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1197 { 1198 check_and_free_fields(htab, elem); 1199 bpf_map_dec_elem_count(&htab->map); 1200 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1201 } 1202 1203 static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, 1204 u64 map_flags) 1205 { 1206 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1207 struct htab_elem *l_new, *l_old = NULL; 1208 struct hlist_nulls_head *head; 1209 unsigned long flags; 1210 struct bucket *b; 1211 u32 key_size, hash; 1212 int ret; 1213 1214 if (unlikely(map_flags > BPF_EXIST)) 1215 /* unknown flags */ 1216 return -EINVAL; 1217 1218 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1219 !rcu_read_lock_bh_held()); 1220 1221 key_size = map->key_size; 1222 1223 hash = htab_map_hash(key, key_size, htab->hashrnd); 1224 1225 b = __select_bucket(htab, hash); 1226 head = &b->head; 1227 1228 /* For LRU, we need to alloc before taking bucket's 1229 * spinlock because getting free nodes from LRU may need 1230 * to remove older elements from htab and this removal 1231 * operation will need a bucket lock. 1232 */ 1233 l_new = prealloc_lru_pop(htab, key, hash); 1234 if (!l_new) 1235 return -ENOMEM; 1236 copy_map_value(&htab->map, 1237 l_new->key + round_up(map->key_size, 8), value); 1238 1239 ret = htab_lock_bucket(htab, b, hash, &flags); 1240 if (ret) 1241 goto err_lock_bucket; 1242 1243 l_old = lookup_elem_raw(head, hash, key, key_size); 1244 1245 ret = check_flags(htab, l_old, map_flags); 1246 if (ret) 1247 goto err; 1248 1249 /* add new element to the head of the list, so that 1250 * concurrent search will find it before old elem 1251 */ 1252 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1253 if (l_old) { 1254 bpf_lru_node_set_ref(&l_new->lru_node); 1255 hlist_nulls_del_rcu(&l_old->hash_node); 1256 } 1257 ret = 0; 1258 1259 err: 1260 htab_unlock_bucket(htab, b, hash, flags); 1261 1262 err_lock_bucket: 1263 if (ret) 1264 htab_lru_push_free(htab, l_new); 1265 else if (l_old) 1266 htab_lru_push_free(htab, l_old); 1267 1268 return ret; 1269 } 1270 1271 static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1272 void *value, u64 map_flags, 1273 bool onallcpus) 1274 { 1275 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1276 struct htab_elem *l_new = NULL, *l_old; 1277 struct hlist_nulls_head *head; 1278 unsigned long flags; 1279 struct bucket *b; 1280 u32 key_size, hash; 1281 int ret; 1282 1283 if (unlikely(map_flags > BPF_EXIST)) 1284 /* unknown flags */ 1285 return -EINVAL; 1286 1287 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1288 !rcu_read_lock_bh_held()); 1289 1290 key_size = map->key_size; 1291 1292 hash = htab_map_hash(key, key_size, htab->hashrnd); 1293 1294 b = __select_bucket(htab, hash); 1295 head = &b->head; 1296 1297 ret = htab_lock_bucket(htab, b, hash, &flags); 1298 if (ret) 1299 return ret; 1300 1301 l_old = lookup_elem_raw(head, hash, key, key_size); 1302 1303 ret = check_flags(htab, l_old, map_flags); 1304 if (ret) 1305 goto err; 1306 1307 if (l_old) { 1308 /* per-cpu hash map can update value in-place */ 1309 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1310 value, onallcpus); 1311 } else { 1312 l_new = alloc_htab_elem(htab, key, value, key_size, 1313 hash, true, onallcpus, NULL); 1314 if (IS_ERR(l_new)) { 1315 ret = PTR_ERR(l_new); 1316 goto err; 1317 } 1318 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1319 } 1320 ret = 0; 1321 err: 1322 htab_unlock_bucket(htab, b, hash, flags); 1323 return ret; 1324 } 1325 1326 static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1327 void *value, u64 map_flags, 1328 bool onallcpus) 1329 { 1330 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1331 struct htab_elem *l_new = NULL, *l_old; 1332 struct hlist_nulls_head *head; 1333 unsigned long flags; 1334 struct bucket *b; 1335 u32 key_size, hash; 1336 int ret; 1337 1338 if (unlikely(map_flags > BPF_EXIST)) 1339 /* unknown flags */ 1340 return -EINVAL; 1341 1342 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1343 !rcu_read_lock_bh_held()); 1344 1345 key_size = map->key_size; 1346 1347 hash = htab_map_hash(key, key_size, htab->hashrnd); 1348 1349 b = __select_bucket(htab, hash); 1350 head = &b->head; 1351 1352 /* For LRU, we need to alloc before taking bucket's 1353 * spinlock because LRU's elem alloc may need 1354 * to remove older elem from htab and this removal 1355 * operation will need a bucket lock. 1356 */ 1357 if (map_flags != BPF_EXIST) { 1358 l_new = prealloc_lru_pop(htab, key, hash); 1359 if (!l_new) 1360 return -ENOMEM; 1361 } 1362 1363 ret = htab_lock_bucket(htab, b, hash, &flags); 1364 if (ret) 1365 goto err_lock_bucket; 1366 1367 l_old = lookup_elem_raw(head, hash, key, key_size); 1368 1369 ret = check_flags(htab, l_old, map_flags); 1370 if (ret) 1371 goto err; 1372 1373 if (l_old) { 1374 bpf_lru_node_set_ref(&l_old->lru_node); 1375 1376 /* per-cpu hash map can update value in-place */ 1377 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1378 value, onallcpus); 1379 } else { 1380 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1381 value, onallcpus); 1382 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1383 l_new = NULL; 1384 } 1385 ret = 0; 1386 err: 1387 htab_unlock_bucket(htab, b, hash, flags); 1388 err_lock_bucket: 1389 if (l_new) { 1390 bpf_map_dec_elem_count(&htab->map); 1391 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1392 } 1393 return ret; 1394 } 1395 1396 static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1397 void *value, u64 map_flags) 1398 { 1399 return __htab_percpu_map_update_elem(map, key, value, map_flags, false); 1400 } 1401 1402 static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1403 void *value, u64 map_flags) 1404 { 1405 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, 1406 false); 1407 } 1408 1409 /* Called from syscall or from eBPF program */ 1410 static long htab_map_delete_elem(struct bpf_map *map, void *key) 1411 { 1412 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1413 struct hlist_nulls_head *head; 1414 struct bucket *b; 1415 struct htab_elem *l; 1416 unsigned long flags; 1417 u32 hash, key_size; 1418 int ret; 1419 1420 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1421 !rcu_read_lock_bh_held()); 1422 1423 key_size = map->key_size; 1424 1425 hash = htab_map_hash(key, key_size, htab->hashrnd); 1426 b = __select_bucket(htab, hash); 1427 head = &b->head; 1428 1429 ret = htab_lock_bucket(htab, b, hash, &flags); 1430 if (ret) 1431 return ret; 1432 1433 l = lookup_elem_raw(head, hash, key, key_size); 1434 1435 if (l) { 1436 hlist_nulls_del_rcu(&l->hash_node); 1437 free_htab_elem(htab, l); 1438 } else { 1439 ret = -ENOENT; 1440 } 1441 1442 htab_unlock_bucket(htab, b, hash, flags); 1443 return ret; 1444 } 1445 1446 static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) 1447 { 1448 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1449 struct hlist_nulls_head *head; 1450 struct bucket *b; 1451 struct htab_elem *l; 1452 unsigned long flags; 1453 u32 hash, key_size; 1454 int ret; 1455 1456 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1457 !rcu_read_lock_bh_held()); 1458 1459 key_size = map->key_size; 1460 1461 hash = htab_map_hash(key, key_size, htab->hashrnd); 1462 b = __select_bucket(htab, hash); 1463 head = &b->head; 1464 1465 ret = htab_lock_bucket(htab, b, hash, &flags); 1466 if (ret) 1467 return ret; 1468 1469 l = lookup_elem_raw(head, hash, key, key_size); 1470 1471 if (l) 1472 hlist_nulls_del_rcu(&l->hash_node); 1473 else 1474 ret = -ENOENT; 1475 1476 htab_unlock_bucket(htab, b, hash, flags); 1477 if (l) 1478 htab_lru_push_free(htab, l); 1479 return ret; 1480 } 1481 1482 static void delete_all_elements(struct bpf_htab *htab) 1483 { 1484 int i; 1485 1486 /* It's called from a worker thread, so disable migration here, 1487 * since bpf_mem_cache_free() relies on that. 1488 */ 1489 migrate_disable(); 1490 for (i = 0; i < htab->n_buckets; i++) { 1491 struct hlist_nulls_head *head = select_bucket(htab, i); 1492 struct hlist_nulls_node *n; 1493 struct htab_elem *l; 1494 1495 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1496 hlist_nulls_del_rcu(&l->hash_node); 1497 htab_elem_free(htab, l); 1498 } 1499 cond_resched(); 1500 } 1501 migrate_enable(); 1502 } 1503 1504 static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab) 1505 { 1506 int i; 1507 1508 rcu_read_lock(); 1509 for (i = 0; i < htab->n_buckets; i++) { 1510 struct hlist_nulls_head *head = select_bucket(htab, i); 1511 struct hlist_nulls_node *n; 1512 struct htab_elem *l; 1513 1514 hlist_nulls_for_each_entry(l, n, head, hash_node) { 1515 /* We only free timer on uref dropping to zero */ 1516 if (btf_record_has_field(htab->map.record, BPF_TIMER)) 1517 bpf_obj_free_timer(htab->map.record, 1518 l->key + round_up(htab->map.key_size, 8)); 1519 if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) 1520 bpf_obj_free_workqueue(htab->map.record, 1521 l->key + round_up(htab->map.key_size, 8)); 1522 } 1523 cond_resched_rcu(); 1524 } 1525 rcu_read_unlock(); 1526 } 1527 1528 static void htab_map_free_timers_and_wq(struct bpf_map *map) 1529 { 1530 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1531 1532 /* We only free timer and workqueue on uref dropping to zero */ 1533 if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) { 1534 if (!htab_is_prealloc(htab)) 1535 htab_free_malloced_timers_and_wq(htab); 1536 else 1537 htab_free_prealloced_timers_and_wq(htab); 1538 } 1539 } 1540 1541 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1542 static void htab_map_free(struct bpf_map *map) 1543 { 1544 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1545 int i; 1546 1547 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1548 * bpf_free_used_maps() is called after bpf prog is no longer executing. 1549 * There is no need to synchronize_rcu() here to protect map elements. 1550 */ 1551 1552 /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it 1553 * underneath and is responsible for waiting for callbacks to finish 1554 * during bpf_mem_alloc_destroy(). 1555 */ 1556 if (!htab_is_prealloc(htab)) { 1557 delete_all_elements(htab); 1558 } else { 1559 htab_free_prealloced_fields(htab); 1560 prealloc_destroy(htab); 1561 } 1562 1563 bpf_map_free_elem_count(map); 1564 free_percpu(htab->extra_elems); 1565 bpf_map_area_free(htab->buckets); 1566 bpf_mem_alloc_destroy(&htab->pcpu_ma); 1567 bpf_mem_alloc_destroy(&htab->ma); 1568 if (htab->use_percpu_counter) 1569 percpu_counter_destroy(&htab->pcount); 1570 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 1571 free_percpu(htab->map_locked[i]); 1572 lockdep_unregister_key(&htab->lockdep_key); 1573 bpf_map_area_free(htab); 1574 } 1575 1576 static void htab_map_seq_show_elem(struct bpf_map *map, void *key, 1577 struct seq_file *m) 1578 { 1579 void *value; 1580 1581 rcu_read_lock(); 1582 1583 value = htab_map_lookup_elem(map, key); 1584 if (!value) { 1585 rcu_read_unlock(); 1586 return; 1587 } 1588 1589 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 1590 seq_puts(m, ": "); 1591 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 1592 seq_puts(m, "\n"); 1593 1594 rcu_read_unlock(); 1595 } 1596 1597 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1598 void *value, bool is_lru_map, 1599 bool is_percpu, u64 flags) 1600 { 1601 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1602 struct hlist_nulls_head *head; 1603 unsigned long bflags; 1604 struct htab_elem *l; 1605 u32 hash, key_size; 1606 struct bucket *b; 1607 int ret; 1608 1609 key_size = map->key_size; 1610 1611 hash = htab_map_hash(key, key_size, htab->hashrnd); 1612 b = __select_bucket(htab, hash); 1613 head = &b->head; 1614 1615 ret = htab_lock_bucket(htab, b, hash, &bflags); 1616 if (ret) 1617 return ret; 1618 1619 l = lookup_elem_raw(head, hash, key, key_size); 1620 if (!l) { 1621 ret = -ENOENT; 1622 } else { 1623 if (is_percpu) { 1624 u32 roundup_value_size = round_up(map->value_size, 8); 1625 void __percpu *pptr; 1626 int off = 0, cpu; 1627 1628 pptr = htab_elem_get_ptr(l, key_size); 1629 for_each_possible_cpu(cpu) { 1630 copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1631 check_and_init_map_value(&htab->map, value + off); 1632 off += roundup_value_size; 1633 } 1634 } else { 1635 u32 roundup_key_size = round_up(map->key_size, 8); 1636 1637 if (flags & BPF_F_LOCK) 1638 copy_map_value_locked(map, value, l->key + 1639 roundup_key_size, 1640 true); 1641 else 1642 copy_map_value(map, value, l->key + 1643 roundup_key_size); 1644 /* Zeroing special fields in the temp buffer */ 1645 check_and_init_map_value(map, value); 1646 } 1647 1648 hlist_nulls_del_rcu(&l->hash_node); 1649 if (!is_lru_map) 1650 free_htab_elem(htab, l); 1651 } 1652 1653 htab_unlock_bucket(htab, b, hash, bflags); 1654 1655 if (is_lru_map && l) 1656 htab_lru_push_free(htab, l); 1657 1658 return ret; 1659 } 1660 1661 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1662 void *value, u64 flags) 1663 { 1664 return __htab_map_lookup_and_delete_elem(map, key, value, false, false, 1665 flags); 1666 } 1667 1668 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1669 void *key, void *value, 1670 u64 flags) 1671 { 1672 return __htab_map_lookup_and_delete_elem(map, key, value, false, true, 1673 flags); 1674 } 1675 1676 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1677 void *value, u64 flags) 1678 { 1679 return __htab_map_lookup_and_delete_elem(map, key, value, true, false, 1680 flags); 1681 } 1682 1683 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1684 void *key, void *value, 1685 u64 flags) 1686 { 1687 return __htab_map_lookup_and_delete_elem(map, key, value, true, true, 1688 flags); 1689 } 1690 1691 static int 1692 __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1693 const union bpf_attr *attr, 1694 union bpf_attr __user *uattr, 1695 bool do_delete, bool is_lru_map, 1696 bool is_percpu) 1697 { 1698 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1699 u32 bucket_cnt, total, key_size, value_size, roundup_key_size; 1700 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1701 void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1702 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1703 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1704 u32 batch, max_count, size, bucket_size, map_id; 1705 struct htab_elem *node_to_free = NULL; 1706 u64 elem_map_flags, map_flags; 1707 struct hlist_nulls_head *head; 1708 struct hlist_nulls_node *n; 1709 unsigned long flags = 0; 1710 bool locked = false; 1711 struct htab_elem *l; 1712 struct bucket *b; 1713 int ret = 0; 1714 1715 elem_map_flags = attr->batch.elem_flags; 1716 if ((elem_map_flags & ~BPF_F_LOCK) || 1717 ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1718 return -EINVAL; 1719 1720 map_flags = attr->batch.flags; 1721 if (map_flags) 1722 return -EINVAL; 1723 1724 max_count = attr->batch.count; 1725 if (!max_count) 1726 return 0; 1727 1728 if (put_user(0, &uattr->batch.count)) 1729 return -EFAULT; 1730 1731 batch = 0; 1732 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1733 return -EFAULT; 1734 1735 if (batch >= htab->n_buckets) 1736 return -ENOENT; 1737 1738 key_size = htab->map.key_size; 1739 roundup_key_size = round_up(htab->map.key_size, 8); 1740 value_size = htab->map.value_size; 1741 size = round_up(value_size, 8); 1742 if (is_percpu) 1743 value_size = size * num_possible_cpus(); 1744 total = 0; 1745 /* while experimenting with hash tables with sizes ranging from 10 to 1746 * 1000, it was observed that a bucket can have up to 5 entries. 1747 */ 1748 bucket_size = 5; 1749 1750 alloc: 1751 /* We cannot do copy_from_user or copy_to_user inside 1752 * the rcu_read_lock. Allocate enough space here. 1753 */ 1754 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); 1755 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); 1756 if (!keys || !values) { 1757 ret = -ENOMEM; 1758 goto after_loop; 1759 } 1760 1761 again: 1762 bpf_disable_instrumentation(); 1763 rcu_read_lock(); 1764 again_nocopy: 1765 dst_key = keys; 1766 dst_val = values; 1767 b = &htab->buckets[batch]; 1768 head = &b->head; 1769 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1770 if (locked) { 1771 ret = htab_lock_bucket(htab, b, batch, &flags); 1772 if (ret) { 1773 rcu_read_unlock(); 1774 bpf_enable_instrumentation(); 1775 goto after_loop; 1776 } 1777 } 1778 1779 bucket_cnt = 0; 1780 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1781 bucket_cnt++; 1782 1783 if (bucket_cnt && !locked) { 1784 locked = true; 1785 goto again_nocopy; 1786 } 1787 1788 if (bucket_cnt > (max_count - total)) { 1789 if (total == 0) 1790 ret = -ENOSPC; 1791 /* Note that since bucket_cnt > 0 here, it is implicit 1792 * that the locked was grabbed, so release it. 1793 */ 1794 htab_unlock_bucket(htab, b, batch, flags); 1795 rcu_read_unlock(); 1796 bpf_enable_instrumentation(); 1797 goto after_loop; 1798 } 1799 1800 if (bucket_cnt > bucket_size) { 1801 bucket_size = bucket_cnt; 1802 /* Note that since bucket_cnt > 0 here, it is implicit 1803 * that the locked was grabbed, so release it. 1804 */ 1805 htab_unlock_bucket(htab, b, batch, flags); 1806 rcu_read_unlock(); 1807 bpf_enable_instrumentation(); 1808 kvfree(keys); 1809 kvfree(values); 1810 goto alloc; 1811 } 1812 1813 /* Next block is only safe to run if you have grabbed the lock */ 1814 if (!locked) 1815 goto next_batch; 1816 1817 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1818 memcpy(dst_key, l->key, key_size); 1819 1820 if (is_percpu) { 1821 int off = 0, cpu; 1822 void __percpu *pptr; 1823 1824 pptr = htab_elem_get_ptr(l, map->key_size); 1825 for_each_possible_cpu(cpu) { 1826 copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); 1827 check_and_init_map_value(&htab->map, dst_val + off); 1828 off += size; 1829 } 1830 } else { 1831 value = l->key + roundup_key_size; 1832 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 1833 struct bpf_map **inner_map = value; 1834 1835 /* Actual value is the id of the inner map */ 1836 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); 1837 value = &map_id; 1838 } 1839 1840 if (elem_map_flags & BPF_F_LOCK) 1841 copy_map_value_locked(map, dst_val, value, 1842 true); 1843 else 1844 copy_map_value(map, dst_val, value); 1845 /* Zeroing special fields in the temp buffer */ 1846 check_and_init_map_value(map, dst_val); 1847 } 1848 if (do_delete) { 1849 hlist_nulls_del_rcu(&l->hash_node); 1850 1851 /* bpf_lru_push_free() will acquire lru_lock, which 1852 * may cause deadlock. See comments in function 1853 * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1854 * after releasing the bucket lock. 1855 */ 1856 if (is_lru_map) { 1857 l->batch_flink = node_to_free; 1858 node_to_free = l; 1859 } else { 1860 free_htab_elem(htab, l); 1861 } 1862 } 1863 dst_key += key_size; 1864 dst_val += value_size; 1865 } 1866 1867 htab_unlock_bucket(htab, b, batch, flags); 1868 locked = false; 1869 1870 while (node_to_free) { 1871 l = node_to_free; 1872 node_to_free = node_to_free->batch_flink; 1873 htab_lru_push_free(htab, l); 1874 } 1875 1876 next_batch: 1877 /* If we are not copying data, we can go to next bucket and avoid 1878 * unlocking the rcu. 1879 */ 1880 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1881 batch++; 1882 goto again_nocopy; 1883 } 1884 1885 rcu_read_unlock(); 1886 bpf_enable_instrumentation(); 1887 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1888 key_size * bucket_cnt) || 1889 copy_to_user(uvalues + total * value_size, values, 1890 value_size * bucket_cnt))) { 1891 ret = -EFAULT; 1892 goto after_loop; 1893 } 1894 1895 total += bucket_cnt; 1896 batch++; 1897 if (batch >= htab->n_buckets) { 1898 ret = -ENOENT; 1899 goto after_loop; 1900 } 1901 goto again; 1902 1903 after_loop: 1904 if (ret == -EFAULT) 1905 goto out; 1906 1907 /* copy # of entries and next batch */ 1908 ubatch = u64_to_user_ptr(attr->batch.out_batch); 1909 if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1910 put_user(total, &uattr->batch.count)) 1911 ret = -EFAULT; 1912 1913 out: 1914 kvfree(keys); 1915 kvfree(values); 1916 return ret; 1917 } 1918 1919 static int 1920 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1921 union bpf_attr __user *uattr) 1922 { 1923 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1924 false, true); 1925 } 1926 1927 static int 1928 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1929 const union bpf_attr *attr, 1930 union bpf_attr __user *uattr) 1931 { 1932 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1933 false, true); 1934 } 1935 1936 static int 1937 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1938 union bpf_attr __user *uattr) 1939 { 1940 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1941 false, false); 1942 } 1943 1944 static int 1945 htab_map_lookup_and_delete_batch(struct bpf_map *map, 1946 const union bpf_attr *attr, 1947 union bpf_attr __user *uattr) 1948 { 1949 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1950 false, false); 1951 } 1952 1953 static int 1954 htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1955 const union bpf_attr *attr, 1956 union bpf_attr __user *uattr) 1957 { 1958 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1959 true, true); 1960 } 1961 1962 static int 1963 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1964 const union bpf_attr *attr, 1965 union bpf_attr __user *uattr) 1966 { 1967 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1968 true, true); 1969 } 1970 1971 static int 1972 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1973 union bpf_attr __user *uattr) 1974 { 1975 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1976 true, false); 1977 } 1978 1979 static int 1980 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 1981 const union bpf_attr *attr, 1982 union bpf_attr __user *uattr) 1983 { 1984 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1985 true, false); 1986 } 1987 1988 struct bpf_iter_seq_hash_map_info { 1989 struct bpf_map *map; 1990 struct bpf_htab *htab; 1991 void *percpu_value_buf; // non-zero means percpu hash 1992 u32 bucket_id; 1993 u32 skip_elems; 1994 }; 1995 1996 static struct htab_elem * 1997 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, 1998 struct htab_elem *prev_elem) 1999 { 2000 const struct bpf_htab *htab = info->htab; 2001 u32 skip_elems = info->skip_elems; 2002 u32 bucket_id = info->bucket_id; 2003 struct hlist_nulls_head *head; 2004 struct hlist_nulls_node *n; 2005 struct htab_elem *elem; 2006 struct bucket *b; 2007 u32 i, count; 2008 2009 if (bucket_id >= htab->n_buckets) 2010 return NULL; 2011 2012 /* try to find next elem in the same bucket */ 2013 if (prev_elem) { 2014 /* no update/deletion on this bucket, prev_elem should be still valid 2015 * and we won't skip elements. 2016 */ 2017 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); 2018 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); 2019 if (elem) 2020 return elem; 2021 2022 /* not found, unlock and go to the next bucket */ 2023 b = &htab->buckets[bucket_id++]; 2024 rcu_read_unlock(); 2025 skip_elems = 0; 2026 } 2027 2028 for (i = bucket_id; i < htab->n_buckets; i++) { 2029 b = &htab->buckets[i]; 2030 rcu_read_lock(); 2031 2032 count = 0; 2033 head = &b->head; 2034 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2035 if (count >= skip_elems) { 2036 info->bucket_id = i; 2037 info->skip_elems = count; 2038 return elem; 2039 } 2040 count++; 2041 } 2042 2043 rcu_read_unlock(); 2044 skip_elems = 0; 2045 } 2046 2047 info->bucket_id = i; 2048 info->skip_elems = 0; 2049 return NULL; 2050 } 2051 2052 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) 2053 { 2054 struct bpf_iter_seq_hash_map_info *info = seq->private; 2055 struct htab_elem *elem; 2056 2057 elem = bpf_hash_map_seq_find_next(info, NULL); 2058 if (!elem) 2059 return NULL; 2060 2061 if (*pos == 0) 2062 ++*pos; 2063 return elem; 2064 } 2065 2066 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2067 { 2068 struct bpf_iter_seq_hash_map_info *info = seq->private; 2069 2070 ++*pos; 2071 ++info->skip_elems; 2072 return bpf_hash_map_seq_find_next(info, v); 2073 } 2074 2075 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) 2076 { 2077 struct bpf_iter_seq_hash_map_info *info = seq->private; 2078 u32 roundup_key_size, roundup_value_size; 2079 struct bpf_iter__bpf_map_elem ctx = {}; 2080 struct bpf_map *map = info->map; 2081 struct bpf_iter_meta meta; 2082 int ret = 0, off = 0, cpu; 2083 struct bpf_prog *prog; 2084 void __percpu *pptr; 2085 2086 meta.seq = seq; 2087 prog = bpf_iter_get_info(&meta, elem == NULL); 2088 if (prog) { 2089 ctx.meta = &meta; 2090 ctx.map = info->map; 2091 if (elem) { 2092 roundup_key_size = round_up(map->key_size, 8); 2093 ctx.key = elem->key; 2094 if (!info->percpu_value_buf) { 2095 ctx.value = elem->key + roundup_key_size; 2096 } else { 2097 roundup_value_size = round_up(map->value_size, 8); 2098 pptr = htab_elem_get_ptr(elem, map->key_size); 2099 for_each_possible_cpu(cpu) { 2100 copy_map_value_long(map, info->percpu_value_buf + off, 2101 per_cpu_ptr(pptr, cpu)); 2102 check_and_init_map_value(map, info->percpu_value_buf + off); 2103 off += roundup_value_size; 2104 } 2105 ctx.value = info->percpu_value_buf; 2106 } 2107 } 2108 ret = bpf_iter_run_prog(prog, &ctx); 2109 } 2110 2111 return ret; 2112 } 2113 2114 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) 2115 { 2116 return __bpf_hash_map_seq_show(seq, v); 2117 } 2118 2119 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) 2120 { 2121 if (!v) 2122 (void)__bpf_hash_map_seq_show(seq, NULL); 2123 else 2124 rcu_read_unlock(); 2125 } 2126 2127 static int bpf_iter_init_hash_map(void *priv_data, 2128 struct bpf_iter_aux_info *aux) 2129 { 2130 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2131 struct bpf_map *map = aux->map; 2132 void *value_buf; 2133 u32 buf_size; 2134 2135 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2136 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 2137 buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 2138 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 2139 if (!value_buf) 2140 return -ENOMEM; 2141 2142 seq_info->percpu_value_buf = value_buf; 2143 } 2144 2145 bpf_map_inc_with_uref(map); 2146 seq_info->map = map; 2147 seq_info->htab = container_of(map, struct bpf_htab, map); 2148 return 0; 2149 } 2150 2151 static void bpf_iter_fini_hash_map(void *priv_data) 2152 { 2153 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2154 2155 bpf_map_put_with_uref(seq_info->map); 2156 kfree(seq_info->percpu_value_buf); 2157 } 2158 2159 static const struct seq_operations bpf_hash_map_seq_ops = { 2160 .start = bpf_hash_map_seq_start, 2161 .next = bpf_hash_map_seq_next, 2162 .stop = bpf_hash_map_seq_stop, 2163 .show = bpf_hash_map_seq_show, 2164 }; 2165 2166 static const struct bpf_iter_seq_info iter_seq_info = { 2167 .seq_ops = &bpf_hash_map_seq_ops, 2168 .init_seq_private = bpf_iter_init_hash_map, 2169 .fini_seq_private = bpf_iter_fini_hash_map, 2170 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), 2171 }; 2172 2173 static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, 2174 void *callback_ctx, u64 flags) 2175 { 2176 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2177 struct hlist_nulls_head *head; 2178 struct hlist_nulls_node *n; 2179 struct htab_elem *elem; 2180 u32 roundup_key_size; 2181 int i, num_elems = 0; 2182 void __percpu *pptr; 2183 struct bucket *b; 2184 void *key, *val; 2185 bool is_percpu; 2186 u64 ret = 0; 2187 2188 if (flags != 0) 2189 return -EINVAL; 2190 2191 is_percpu = htab_is_percpu(htab); 2192 2193 roundup_key_size = round_up(map->key_size, 8); 2194 /* disable migration so percpu value prepared here will be the 2195 * same as the one seen by the bpf program with bpf_map_lookup_elem(). 2196 */ 2197 if (is_percpu) 2198 migrate_disable(); 2199 for (i = 0; i < htab->n_buckets; i++) { 2200 b = &htab->buckets[i]; 2201 rcu_read_lock(); 2202 head = &b->head; 2203 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2204 key = elem->key; 2205 if (is_percpu) { 2206 /* current cpu value for percpu map */ 2207 pptr = htab_elem_get_ptr(elem, map->key_size); 2208 val = this_cpu_ptr(pptr); 2209 } else { 2210 val = elem->key + roundup_key_size; 2211 } 2212 num_elems++; 2213 ret = callback_fn((u64)(long)map, (u64)(long)key, 2214 (u64)(long)val, (u64)(long)callback_ctx, 0); 2215 /* return value: 0 - continue, 1 - stop and return */ 2216 if (ret) { 2217 rcu_read_unlock(); 2218 goto out; 2219 } 2220 } 2221 rcu_read_unlock(); 2222 } 2223 out: 2224 if (is_percpu) 2225 migrate_enable(); 2226 return num_elems; 2227 } 2228 2229 static u64 htab_map_mem_usage(const struct bpf_map *map) 2230 { 2231 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2232 u32 value_size = round_up(htab->map.value_size, 8); 2233 bool prealloc = htab_is_prealloc(htab); 2234 bool percpu = htab_is_percpu(htab); 2235 bool lru = htab_is_lru(htab); 2236 u64 num_entries; 2237 u64 usage = sizeof(struct bpf_htab); 2238 2239 usage += sizeof(struct bucket) * htab->n_buckets; 2240 usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT; 2241 if (prealloc) { 2242 num_entries = map->max_entries; 2243 if (htab_has_extra_elems(htab)) 2244 num_entries += num_possible_cpus(); 2245 2246 usage += htab->elem_size * num_entries; 2247 2248 if (percpu) 2249 usage += value_size * num_possible_cpus() * num_entries; 2250 else if (!lru) 2251 usage += sizeof(struct htab_elem *) * num_possible_cpus(); 2252 } else { 2253 #define LLIST_NODE_SZ sizeof(struct llist_node) 2254 2255 num_entries = htab->use_percpu_counter ? 2256 percpu_counter_sum(&htab->pcount) : 2257 atomic_read(&htab->count); 2258 usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; 2259 if (percpu) { 2260 usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; 2261 usage += value_size * num_possible_cpus() * num_entries; 2262 } 2263 } 2264 return usage; 2265 } 2266 2267 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2268 const struct bpf_map_ops htab_map_ops = { 2269 .map_meta_equal = bpf_map_meta_equal, 2270 .map_alloc_check = htab_map_alloc_check, 2271 .map_alloc = htab_map_alloc, 2272 .map_free = htab_map_free, 2273 .map_get_next_key = htab_map_get_next_key, 2274 .map_release_uref = htab_map_free_timers_and_wq, 2275 .map_lookup_elem = htab_map_lookup_elem, 2276 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, 2277 .map_update_elem = htab_map_update_elem, 2278 .map_delete_elem = htab_map_delete_elem, 2279 .map_gen_lookup = htab_map_gen_lookup, 2280 .map_seq_show_elem = htab_map_seq_show_elem, 2281 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2282 .map_for_each_callback = bpf_for_each_hash_elem, 2283 .map_mem_usage = htab_map_mem_usage, 2284 BATCH_OPS(htab), 2285 .map_btf_id = &htab_map_btf_ids[0], 2286 .iter_seq_info = &iter_seq_info, 2287 }; 2288 2289 const struct bpf_map_ops htab_lru_map_ops = { 2290 .map_meta_equal = bpf_map_meta_equal, 2291 .map_alloc_check = htab_map_alloc_check, 2292 .map_alloc = htab_map_alloc, 2293 .map_free = htab_map_free, 2294 .map_get_next_key = htab_map_get_next_key, 2295 .map_release_uref = htab_map_free_timers_and_wq, 2296 .map_lookup_elem = htab_lru_map_lookup_elem, 2297 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, 2298 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, 2299 .map_update_elem = htab_lru_map_update_elem, 2300 .map_delete_elem = htab_lru_map_delete_elem, 2301 .map_gen_lookup = htab_lru_map_gen_lookup, 2302 .map_seq_show_elem = htab_map_seq_show_elem, 2303 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2304 .map_for_each_callback = bpf_for_each_hash_elem, 2305 .map_mem_usage = htab_map_mem_usage, 2306 BATCH_OPS(htab_lru), 2307 .map_btf_id = &htab_map_btf_ids[0], 2308 .iter_seq_info = &iter_seq_info, 2309 }; 2310 2311 /* Called from eBPF program */ 2312 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2313 { 2314 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2315 2316 if (l) 2317 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2318 else 2319 return NULL; 2320 } 2321 2322 /* inline bpf_map_lookup_elem() call for per-CPU hashmap */ 2323 static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 2324 { 2325 struct bpf_insn *insn = insn_buf; 2326 2327 if (!bpf_jit_supports_percpu_insn()) 2328 return -EOPNOTSUPP; 2329 2330 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2331 (void *(*)(struct bpf_map *map, void *key))NULL)); 2332 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2333 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2334 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2335 offsetof(struct htab_elem, key) + map->key_size); 2336 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2337 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2338 2339 return insn - insn_buf; 2340 } 2341 2342 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2343 { 2344 struct htab_elem *l; 2345 2346 if (cpu >= nr_cpu_ids) 2347 return NULL; 2348 2349 l = __htab_map_lookup_elem(map, key); 2350 if (l) 2351 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2352 else 2353 return NULL; 2354 } 2355 2356 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2357 { 2358 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2359 2360 if (l) { 2361 bpf_lru_node_set_ref(&l->lru_node); 2362 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2363 } 2364 2365 return NULL; 2366 } 2367 2368 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2369 { 2370 struct htab_elem *l; 2371 2372 if (cpu >= nr_cpu_ids) 2373 return NULL; 2374 2375 l = __htab_map_lookup_elem(map, key); 2376 if (l) { 2377 bpf_lru_node_set_ref(&l->lru_node); 2378 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2379 } 2380 2381 return NULL; 2382 } 2383 2384 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) 2385 { 2386 struct htab_elem *l; 2387 void __percpu *pptr; 2388 int ret = -ENOENT; 2389 int cpu, off = 0; 2390 u32 size; 2391 2392 /* per_cpu areas are zero-filled and bpf programs can only 2393 * access 'value_size' of them, so copying rounded areas 2394 * will not leak any kernel data 2395 */ 2396 size = round_up(map->value_size, 8); 2397 rcu_read_lock(); 2398 l = __htab_map_lookup_elem(map, key); 2399 if (!l) 2400 goto out; 2401 /* We do not mark LRU map element here in order to not mess up 2402 * eviction heuristics when user space does a map walk. 2403 */ 2404 pptr = htab_elem_get_ptr(l, map->key_size); 2405 for_each_possible_cpu(cpu) { 2406 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2407 check_and_init_map_value(map, value + off); 2408 off += size; 2409 } 2410 ret = 0; 2411 out: 2412 rcu_read_unlock(); 2413 return ret; 2414 } 2415 2416 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2417 u64 map_flags) 2418 { 2419 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2420 int ret; 2421 2422 rcu_read_lock(); 2423 if (htab_is_lru(htab)) 2424 ret = __htab_lru_percpu_map_update_elem(map, key, value, 2425 map_flags, true); 2426 else 2427 ret = __htab_percpu_map_update_elem(map, key, value, map_flags, 2428 true); 2429 rcu_read_unlock(); 2430 2431 return ret; 2432 } 2433 2434 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, 2435 struct seq_file *m) 2436 { 2437 struct htab_elem *l; 2438 void __percpu *pptr; 2439 int cpu; 2440 2441 rcu_read_lock(); 2442 2443 l = __htab_map_lookup_elem(map, key); 2444 if (!l) { 2445 rcu_read_unlock(); 2446 return; 2447 } 2448 2449 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 2450 seq_puts(m, ": {\n"); 2451 pptr = htab_elem_get_ptr(l, map->key_size); 2452 for_each_possible_cpu(cpu) { 2453 seq_printf(m, "\tcpu%d: ", cpu); 2454 btf_type_seq_show(map->btf, map->btf_value_type_id, 2455 per_cpu_ptr(pptr, cpu), m); 2456 seq_puts(m, "\n"); 2457 } 2458 seq_puts(m, "}\n"); 2459 2460 rcu_read_unlock(); 2461 } 2462 2463 const struct bpf_map_ops htab_percpu_map_ops = { 2464 .map_meta_equal = bpf_map_meta_equal, 2465 .map_alloc_check = htab_map_alloc_check, 2466 .map_alloc = htab_map_alloc, 2467 .map_free = htab_map_free, 2468 .map_get_next_key = htab_map_get_next_key, 2469 .map_lookup_elem = htab_percpu_map_lookup_elem, 2470 .map_gen_lookup = htab_percpu_map_gen_lookup, 2471 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, 2472 .map_update_elem = htab_percpu_map_update_elem, 2473 .map_delete_elem = htab_map_delete_elem, 2474 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, 2475 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2476 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2477 .map_for_each_callback = bpf_for_each_hash_elem, 2478 .map_mem_usage = htab_map_mem_usage, 2479 BATCH_OPS(htab_percpu), 2480 .map_btf_id = &htab_map_btf_ids[0], 2481 .iter_seq_info = &iter_seq_info, 2482 }; 2483 2484 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2485 .map_meta_equal = bpf_map_meta_equal, 2486 .map_alloc_check = htab_map_alloc_check, 2487 .map_alloc = htab_map_alloc, 2488 .map_free = htab_map_free, 2489 .map_get_next_key = htab_map_get_next_key, 2490 .map_lookup_elem = htab_lru_percpu_map_lookup_elem, 2491 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, 2492 .map_update_elem = htab_lru_percpu_map_update_elem, 2493 .map_delete_elem = htab_lru_map_delete_elem, 2494 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, 2495 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2496 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2497 .map_for_each_callback = bpf_for_each_hash_elem, 2498 .map_mem_usage = htab_map_mem_usage, 2499 BATCH_OPS(htab_lru_percpu), 2500 .map_btf_id = &htab_map_btf_ids[0], 2501 .iter_seq_info = &iter_seq_info, 2502 }; 2503 2504 static int fd_htab_map_alloc_check(union bpf_attr *attr) 2505 { 2506 if (attr->value_size != sizeof(u32)) 2507 return -EINVAL; 2508 return htab_map_alloc_check(attr); 2509 } 2510 2511 static void fd_htab_map_free(struct bpf_map *map) 2512 { 2513 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2514 struct hlist_nulls_node *n; 2515 struct hlist_nulls_head *head; 2516 struct htab_elem *l; 2517 int i; 2518 2519 for (i = 0; i < htab->n_buckets; i++) { 2520 head = select_bucket(htab, i); 2521 2522 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2523 void *ptr = fd_htab_map_get_ptr(map, l); 2524 2525 map->ops->map_fd_put_ptr(map, ptr, false); 2526 } 2527 } 2528 2529 htab_map_free(map); 2530 } 2531 2532 /* only called from syscall */ 2533 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 2534 { 2535 void **ptr; 2536 int ret = 0; 2537 2538 if (!map->ops->map_fd_sys_lookup_elem) 2539 return -ENOTSUPP; 2540 2541 rcu_read_lock(); 2542 ptr = htab_map_lookup_elem(map, key); 2543 if (ptr) 2544 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); 2545 else 2546 ret = -ENOENT; 2547 rcu_read_unlock(); 2548 2549 return ret; 2550 } 2551 2552 /* only called from syscall */ 2553 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 2554 void *key, void *value, u64 map_flags) 2555 { 2556 void *ptr; 2557 int ret; 2558 u32 ufd = *(u32 *)value; 2559 2560 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 2561 if (IS_ERR(ptr)) 2562 return PTR_ERR(ptr); 2563 2564 /* The htab bucket lock is always held during update operations in fd 2565 * htab map, and the following rcu_read_lock() is only used to avoid 2566 * the WARN_ON_ONCE in htab_map_update_elem(). 2567 */ 2568 rcu_read_lock(); 2569 ret = htab_map_update_elem(map, key, &ptr, map_flags); 2570 rcu_read_unlock(); 2571 if (ret) 2572 map->ops->map_fd_put_ptr(map, ptr, false); 2573 2574 return ret; 2575 } 2576 2577 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) 2578 { 2579 struct bpf_map *map, *inner_map_meta; 2580 2581 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 2582 if (IS_ERR(inner_map_meta)) 2583 return inner_map_meta; 2584 2585 map = htab_map_alloc(attr); 2586 if (IS_ERR(map)) { 2587 bpf_map_meta_free(inner_map_meta); 2588 return map; 2589 } 2590 2591 map->inner_map_meta = inner_map_meta; 2592 2593 return map; 2594 } 2595 2596 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) 2597 { 2598 struct bpf_map **inner_map = htab_map_lookup_elem(map, key); 2599 2600 if (!inner_map) 2601 return NULL; 2602 2603 return READ_ONCE(*inner_map); 2604 } 2605 2606 static int htab_of_map_gen_lookup(struct bpf_map *map, 2607 struct bpf_insn *insn_buf) 2608 { 2609 struct bpf_insn *insn = insn_buf; 2610 const int ret = BPF_REG_0; 2611 2612 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2613 (void *(*)(struct bpf_map *map, void *key))NULL)); 2614 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2615 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); 2616 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 2617 offsetof(struct htab_elem, key) + 2618 round_up(map->key_size, 8)); 2619 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 2620 2621 return insn - insn_buf; 2622 } 2623 2624 static void htab_of_map_free(struct bpf_map *map) 2625 { 2626 bpf_map_meta_free(map->inner_map_meta); 2627 fd_htab_map_free(map); 2628 } 2629 2630 const struct bpf_map_ops htab_of_maps_map_ops = { 2631 .map_alloc_check = fd_htab_map_alloc_check, 2632 .map_alloc = htab_of_map_alloc, 2633 .map_free = htab_of_map_free, 2634 .map_get_next_key = htab_map_get_next_key, 2635 .map_lookup_elem = htab_of_map_lookup_elem, 2636 .map_delete_elem = htab_map_delete_elem, 2637 .map_fd_get_ptr = bpf_map_fd_get_ptr, 2638 .map_fd_put_ptr = bpf_map_fd_put_ptr, 2639 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2640 .map_gen_lookup = htab_of_map_gen_lookup, 2641 .map_check_btf = map_check_no_btf, 2642 .map_mem_usage = htab_map_mem_usage, 2643 BATCH_OPS(htab), 2644 .map_btf_id = &htab_map_btf_ids[0], 2645 }; 2646
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.