~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/numa_emulation.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/numa_emulation.c (Version linux-6.12-rc7) and /mm/numa_emulation.c (Version linux-5.5.19)


  1 // SPDX-License-Identifier: GPL-2.0                 1 
  2 /*                                                
  3  * NUMA emulation                                 
  4  */                                               
  5 #include <linux/kernel.h>                         
  6 #include <linux/errno.h>                          
  7 #include <linux/topology.h>                       
  8 #include <linux/memblock.h>                       
  9 #include <linux/numa_memblks.h>                   
 10 #include <asm/numa.h>                             
 11                                                   
 12 #define FAKE_NODE_MIN_SIZE      ((u64)32 << 20    
 13 #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_M    
 14                                                   
 15 static int emu_nid_to_phys[MAX_NUMNODES];         
 16 static char *emu_cmdline __initdata;              
 17                                                   
 18 int __init numa_emu_cmdline(char *str)            
 19 {                                                 
 20         emu_cmdline = str;                        
 21         return 0;                                 
 22 }                                                 
 23                                                   
 24 static int __init emu_find_memblk_by_nid(int n    
 25 {                                                 
 26         int i;                                    
 27                                                   
 28         for (i = 0; i < mi->nr_blks; i++)         
 29                 if (mi->blk[i].nid == nid)        
 30                         return i;                 
 31         return -ENOENT;                           
 32 }                                                 
 33                                                   
 34 static u64 __init mem_hole_size(u64 start, u64    
 35 {                                                 
 36         unsigned long start_pfn = PFN_UP(start    
 37         unsigned long end_pfn = PFN_DOWN(end);    
 38                                                   
 39         if (start_pfn < end_pfn)                  
 40                 return PFN_PHYS(absent_pages_i    
 41         return 0;                                 
 42 }                                                 
 43                                                   
 44 /*                                                
 45  * Sets up nid to range from @start to @end.      
 46  * something went wrong, 0 otherwise.             
 47  */                                               
 48 static int __init emu_setup_memblk(struct numa    
 49                                    struct numa    
 50                                    int nid, in    
 51 {                                                 
 52         struct numa_memblk *eb = &ei->blk[ei->    
 53         struct numa_memblk *pb = &pi->blk[phys    
 54                                                   
 55         if (ei->nr_blks >= NR_NODE_MEMBLKS) {     
 56                 pr_err("NUMA: Too many emulate    
 57                 return -EINVAL;                   
 58         }                                         
 59                                                   
 60         ei->nr_blks++;                            
 61         eb->start = pb->start;                    
 62         eb->end = pb->start + size;               
 63         eb->nid = nid;                            
 64                                                   
 65         if (emu_nid_to_phys[nid] == NUMA_NO_NO    
 66                 emu_nid_to_phys[nid] = pb->nid    
 67                                                   
 68         pb->start += size;                        
 69         if (pb->start >= pb->end) {               
 70                 WARN_ON_ONCE(pb->start > pb->e    
 71                 numa_remove_memblk_from(phys_b    
 72         }                                         
 73                                                   
 74         printk(KERN_INFO "Faking node %d at [m    
 75                nid, eb->start, eb->end - 1, (e    
 76         return 0;                                 
 77 }                                                 
 78                                                   
 79 /*                                                
 80  * Sets up nr_nodes fake nodes interleaved ove    
 81  * to max_addr.                                   
 82  *                                                
 83  * Returns zero on success or negative on erro    
 84  */                                               
 85 static int __init split_nodes_interleave(struc    
 86                                          struc    
 87                                          u64 a    
 88 {                                                 
 89         nodemask_t physnode_mask = numa_nodes_    
 90         u64 size;                                 
 91         int big;                                  
 92         int nid = 0;                              
 93         int i, ret;                               
 94                                                   
 95         if (nr_nodes <= 0)                        
 96                 return -1;                        
 97         if (nr_nodes > MAX_NUMNODES) {            
 98                 pr_info("numa=fake=%d too larg    
 99                         nr_nodes, MAX_NUMNODES    
100                 nr_nodes = MAX_NUMNODES;          
101         }                                         
102                                                   
103         /*                                        
104          * Calculate target node size.  x86_32    
105          * the division in ulong number of pag    
106          */                                       
107         size = max_addr - addr - mem_hole_size    
108         size = PFN_PHYS((unsigned long)(size >    
109                                                   
110         /*                                        
111          * Calculate the number of big nodes t    
112          * of consolidating the remainder.        
113          */                                       
114         big = ((size & ~FAKE_NODE_MIN_HASH_MAS    
115                 FAKE_NODE_MIN_SIZE;               
116                                                   
117         size &= FAKE_NODE_MIN_HASH_MASK;          
118         if (!size) {                              
119                 pr_err("Not enough memory for     
120                         "NUMA emulation disabl    
121                 return -1;                        
122         }                                         
123                                                   
124         /*                                        
125          * Continue to fill physical nodes wit    
126          * memory left on any of them.            
127          */                                       
128         while (!nodes_empty(physnode_mask)) {     
129                 for_each_node_mask(i, physnode    
130                         u64 dma32_end = numa_e    
131                         u64 start, limit, end;    
132                         int phys_blk;             
133                                                   
134                         phys_blk = emu_find_me    
135                         if (phys_blk < 0) {       
136                                 node_clear(i,     
137                                 continue;         
138                         }                         
139                         start = pi->blk[phys_b    
140                         limit = pi->blk[phys_b    
141                         end = start + size;       
142                                                   
143                         if (nid < big)            
144                                 end += FAKE_NO    
145                                                   
146                         /*                        
147                          * Continue to add mem    
148                          * non-reserved memory    
149                          */                       
150                         while (end - start - m    
151                                 end += FAKE_NO    
152                                 if (end > limi    
153                                         end =     
154                                         break;    
155                                 }                 
156                         }                         
157                                                   
158                         /*                        
159                          * If there won't be a    
160                          * non-reserved memory    
161                          * this one must exten    
162                          */                       
163                         if (end < dma32_end &&    
164                             mem_hole_size(end,    
165                                 end = dma32_en    
166                                                   
167                         /*                        
168                          * If there won't be e    
169                          * next node, this one    
170                          * physical node.         
171                          */                       
172                         if (limit - end - mem_    
173                                 end = limit;      
174                                                   
175                         ret = emu_setup_memblk    
176                                                   
177                                                   
178                         if (ret < 0)              
179                                 return ret;       
180                 }                                 
181         }                                         
182         return 0;                                 
183 }                                                 
184                                                   
185 /*                                                
186  * Returns the end address of a node so that t    
187  * non-reserved memory or `max_addr' is reache    
188  */                                               
189 static u64 __init find_end_of_node(u64 start,     
190 {                                                 
191         u64 end = start + size;                   
192                                                   
193         while (end - start - mem_hole_size(sta    
194                 end += FAKE_NODE_MIN_SIZE;        
195                 if (end > max_addr) {             
196                         end = max_addr;           
197                         break;                    
198                 }                                 
199         }                                         
200         return end;                               
201 }                                                 
202                                                   
203 static u64 uniform_size(u64 max_addr, u64 base    
204 {                                                 
205         unsigned long max_pfn = PHYS_PFN(max_a    
206         unsigned long base_pfn = PHYS_PFN(base    
207         unsigned long hole_pfns = PHYS_PFN(hol    
208                                                   
209         return PFN_PHYS((max_pfn - base_pfn -     
210 }                                                 
211                                                   
212 /*                                                
213  * Sets up fake nodes of `size' interleaved ov    
214  * `addr' to `max_addr'.                          
215  *                                                
216  * Returns zero on success or negative on erro    
217  */                                               
218 static int __init split_nodes_size_interleave_    
219                                                   
220                                                   
221                                                   
222                                                   
223 {                                                 
224         nodemask_t physnode_mask = numa_nodes_    
225         int i, ret, uniform = 0;                  
226         u64 min_size;                             
227                                                   
228         if ((!size && !nr_nodes) || (nr_nodes     
229                 return -1;                        
230                                                   
231         /*                                        
232          * In the 'uniform' case split the pas    
233          * nr_nodes, in the non-uniform case,     
234          * physical block and try to create no    
235          * @size.                                 
236          *                                        
237          * In the uniform case, split the node    
238          * capacity, i.e. ignore holes. In the    
239          * for holes and treat @size as a mini    
240          */                                       
241         if (!nr_nodes)                            
242                 nr_nodes = MAX_NUMNODES;          
243         else {                                    
244                 nodes_clear(physnode_mask);       
245                 node_set(pblk->nid, physnode_m    
246                 uniform = 1;                      
247         }                                         
248                                                   
249         if (uniform) {                            
250                 min_size = uniform_size(max_ad    
251                 size = min_size;                  
252         } else {                                  
253                 /*                                
254                  * The limit on emulated nodes    
255                  * size per node is increased     
256                  * requested size is too small    
257                  * distribution of node sizes     
258                  * (but not necessarily over p    
259                  */                               
260                 min_size = uniform_size(max_ad    
261                                 mem_hole_size(    
262         }                                         
263         min_size = ALIGN(max(min_size, FAKE_NO    
264         if (size < min_size) {                    
265                 pr_err("Fake node size %LuMB t    
266                         size >> 20, min_size >    
267                 size = min_size;                  
268         }                                         
269         size = ALIGN_DOWN(size, FAKE_NODE_MIN_    
270                                                   
271         /*                                        
272          * Fill physical nodes with fake nodes    
273          * left on any of them.                   
274          */                                       
275         while (!nodes_empty(physnode_mask)) {     
276                 for_each_node_mask(i, physnode    
277                         u64 dma32_end = numa_e    
278                         u64 start, limit, end;    
279                         int phys_blk;             
280                                                   
281                         phys_blk = emu_find_me    
282                         if (phys_blk < 0) {       
283                                 node_clear(i,     
284                                 continue;         
285                         }                         
286                                                   
287                         start = pi->blk[phys_b    
288                         limit = pi->blk[phys_b    
289                                                   
290                         if (uniform)              
291                                 end = start +     
292                         else                      
293                                 end = find_end    
294                         /*                        
295                          * If there won't be a    
296                          * non-reserved memory    
297                          * this one must exten    
298                          */                       
299                         if (end < dma32_end &&    
300                             mem_hole_size(end,    
301                                 end = dma32_en    
302                                                   
303                         /*                        
304                          * If there won't be e    
305                          * next node, this one    
306                          * physical node.         
307                          */                       
308                         if ((limit - end - mem    
309                                         && !un    
310                                 end = limit;      
311                                                   
312                         ret = emu_setup_memblk    
313                                                   
314                                                   
315                         if (ret < 0)              
316                                 return ret;       
317                 }                                 
318         }                                         
319         return nid;                               
320 }                                                 
321                                                   
322 static int __init split_nodes_size_interleave(    
323                                                   
324                                                   
325 {                                                 
326         return split_nodes_size_interleave_uni    
327                         0, NULL, 0);              
328 }                                                 
329                                                   
330 static int __init setup_emu2phys_nid(int *dfl_    
331 {                                                 
332         int i, max_emu_nid = 0;                   
333                                                   
334         *dfl_phys_nid = NUMA_NO_NODE;             
335         for (i = 0; i < ARRAY_SIZE(emu_nid_to_    
336                 if (emu_nid_to_phys[i] != NUMA    
337                         max_emu_nid = i;          
338                         if (*dfl_phys_nid == N    
339                                 *dfl_phys_nid     
340                 }                                 
341         }                                         
342                                                   
343         return max_emu_nid;                       
344 }                                                 
345                                                   
346 /**                                               
347  * numa_emulation - Emulate NUMA nodes            
348  * @numa_meminfo: NUMA configuration to massag    
349  * @numa_dist_cnt: The size of the physical NU    
350  *                                                
351  * Emulate NUMA nodes according to the numa=fa    
352  * @numa_meminfo contains the physical memory     
353  * to reflect the emulated configuration on su    
354  * used to determine the size of the physical     
355  *                                                
356  * On success, the following modifications are    
357  *                                                
358  * - @numa_meminfo is updated to reflect the e    
359  *                                                
360  * - __apicid_to_node[] is updated such that A    
361  *   emulated nodes.                              
362  *                                                
363  * - NUMA distance table is rebuilt to represe    
364  *   nodes.  The distances are determined cons    
365  *   are mapped to physical nodes and match th    
366  *                                                
367  * - emu_nid_to_phys[] reflects how emulated n    
368  *   nodes.  This is used by numa_add_cpu() an    
369  *                                                
370  * If emulation is not enabled or fails, emu_n    
371  * identity mapping and no other modification     
372  */                                               
373 void __init numa_emulation(struct numa_meminfo    
374 {                                                 
375         static struct numa_meminfo ei __initda    
376         static struct numa_meminfo pi __initda    
377         const u64 max_addr = PFN_PHYS(max_pfn)    
378         u8 *phys_dist = NULL;                     
379         size_t phys_size = numa_dist_cnt * num    
380         int max_emu_nid, dfl_phys_nid;            
381         int i, j, ret;                            
382                                                   
383         if (!emu_cmdline)                         
384                 goto no_emu;                      
385                                                   
386         memset(&ei, 0, sizeof(ei));               
387         pi = *numa_meminfo;                       
388                                                   
389         for (i = 0; i < MAX_NUMNODES; i++)        
390                 emu_nid_to_phys[i] = NUMA_NO_N    
391                                                   
392         /*                                        
393          * If the numa=fake command-line conta    
394          * the fixed node size.  Otherwise, if    
395          * split the system RAM into N fake no    
396          */                                       
397         if (strchr(emu_cmdline, 'U')) {           
398                 nodemask_t physnode_mask = num    
399                 unsigned long n;                  
400                 int nid = 0;                      
401                                                   
402                 n = simple_strtoul(emu_cmdline    
403                 ret = -1;                         
404                 for_each_node_mask(i, physnode    
405                         /*                        
406                          * The reason we pass     
407                          * numa_remove_memblk_    
408                          * emu_setup_memblk()     
409                          * and then move every    
410                          * array. Therefore we    
411                          * at blk[0].             
412                          */                       
413                         ret = split_nodes_size    
414                                         pi.blk    
415                                         n, &pi    
416                         if (ret < 0)              
417                                 break;            
418                         if (ret < n) {            
419                                 pr_info("%s: p    
420                                                   
421                                 ret = -1;         
422                                 break;            
423                         }                         
424                         nid = ret;                
425                 }                                 
426         } else if (strchr(emu_cmdline, 'M') ||    
427                 u64 size;                         
428                                                   
429                 size = memparse(emu_cmdline, &    
430                 ret = split_nodes_size_interle    
431         } else {                                  
432                 unsigned long n;                  
433                                                   
434                 n = simple_strtoul(emu_cmdline    
435                 ret = split_nodes_interleave(&    
436         }                                         
437         if (*emu_cmdline == ':')                  
438                 emu_cmdline++;                    
439                                                   
440         if (ret < 0)                              
441                 goto no_emu;                      
442                                                   
443         if (numa_cleanup_meminfo(&ei) < 0) {      
444                 pr_warn("NUMA: Warning: constr    
445                 goto no_emu;                      
446         }                                         
447                                                   
448         /* copy the physical distance table */    
449         if (numa_dist_cnt) {                      
450                 phys_dist = memblock_alloc(phy    
451                 if (!phys_dist) {                 
452                         pr_warn("NUMA: Warning    
453                         goto no_emu;              
454                 }                                 
455                                                   
456                 for (i = 0; i < numa_dist_cnt;    
457                         for (j = 0; j < numa_d    
458                                 phys_dist[i *     
459                                         node_d    
460         }                                         
461                                                   
462         /*                                        
463          * Determine the max emulated nid and     
464          * for unmapped nodes.                    
465          */                                       
466         max_emu_nid = setup_emu2phys_nid(&dfl_    
467                                                   
468         /* commit */                              
469         *numa_meminfo = ei;                       
470                                                   
471         /* Make sure numa_nodes_parsed only co    
472         nodes_clear(numa_nodes_parsed);           
473         for (i = 0; i < ARRAY_SIZE(ei.blk); i+    
474                 if (ei.blk[i].start != ei.blk[    
475                     ei.blk[i].nid != NUMA_NO_N    
476                         node_set(ei.blk[i].nid    
477                                                   
478         numa_emu_update_cpu_to_node(emu_nid_to    
479                                                   
480         /* make sure all emulated nodes are ma    
481         for (i = 0; i < ARRAY_SIZE(emu_nid_to_    
482                 if (emu_nid_to_phys[i] == NUMA    
483                         emu_nid_to_phys[i] = d    
484                                                   
485         /* transform distance table */            
486         numa_reset_distance();                    
487         for (i = 0; i < max_emu_nid + 1; i++)     
488                 for (j = 0; j < max_emu_nid +     
489                         int physi = emu_nid_to    
490                         int physj = emu_nid_to    
491                         int dist;                 
492                                                   
493                         if (get_option(&emu_cm    
494                                 ;                 
495                         else if (physi >= numa    
496                                 dist = physi =    
497                                         LOCAL_    
498                         else                      
499                                 dist = phys_di    
500                                                   
501                         numa_set_distance(i, j    
502                 }                                 
503         }                                         
504                                                   
505         /* free the copied physical distance t    
506         memblock_free(phys_dist, phys_size);      
507         return;                                   
508                                                   
509 no_emu:                                           
510         /* No emulation.  Build identity emu_n    
511         for (i = 0; i < ARRAY_SIZE(emu_nid_to_    
512                 emu_nid_to_phys[i] = i;           
513 }                                                 
514                                                   
515 #ifndef CONFIG_DEBUG_PER_CPU_MAPS                 
516 void numa_add_cpu(unsigned int cpu)               
517 {                                                 
518         int physnid, nid;                         
519                                                   
520         nid = early_cpu_to_node(cpu);             
521         BUG_ON(nid == NUMA_NO_NODE || !node_on    
522                                                   
523         physnid = emu_nid_to_phys[nid];           
524                                                   
525         /*                                        
526          * Map the cpu to each emulated node t    
527          * node of the cpu's apic id.             
528          */                                       
529         for_each_online_node(nid)                 
530                 if (emu_nid_to_phys[nid] == ph    
531                         cpumask_set_cpu(cpu, n    
532 }                                                 
533                                                   
534 void numa_remove_cpu(unsigned int cpu)            
535 {                                                 
536         int i;                                    
537                                                   
538         for_each_online_node(i)                   
539                 cpumask_clear_cpu(cpu, node_to    
540 }                                                 
541 #else   /* !CONFIG_DEBUG_PER_CPU_MAPS */          
542 static void numa_set_cpumask(unsigned int cpu,    
543 {                                                 
544         int nid, physnid;                         
545                                                   
546         nid = early_cpu_to_node(cpu);             
547         if (nid == NUMA_NO_NODE) {                
548                 /* early_cpu_to_node() already    
549                 return;                           
550         }                                         
551                                                   
552         physnid = emu_nid_to_phys[nid];           
553                                                   
554         for_each_online_node(nid) {               
555                 if (emu_nid_to_phys[nid] != ph    
556                         continue;                 
557                                                   
558                 debug_cpumask_set_cpu(cpu, nid    
559         }                                         
560 }                                                 
561                                                   
562 void numa_add_cpu(unsigned int cpu)               
563 {                                                 
564         numa_set_cpumask(cpu, true);              
565 }                                                 
566                                                   
567 void numa_remove_cpu(unsigned int cpu)            
568 {                                                 
569         numa_set_cpumask(cpu, false);             
570 }                                                 
571 #endif  /* !CONFIG_DEBUG_PER_CPU_MAPS */          
572                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php