1 // SPDX-License-Identifier: GPL-2.0-only 1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 2 /* 3 * linux/mm/vmstat.c 3 * linux/mm/vmstat.c 4 * 4 * 5 * Manages VM statistics 5 * Manages VM statistics 6 * Copyright (C) 1991, 1992, 1993, 1994 Linu 6 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 7 * 7 * 8 * zoned VM statistics 8 * zoned VM statistics 9 * Copyright (C) 2006 Silicon Graphics, Inc., 9 * Copyright (C) 2006 Silicon Graphics, Inc., 10 * Christoph Lameter <christoph@l 10 * Christoph Lameter <christoph@lameter.com> 11 * Copyright (C) 2008-2014 Christoph Lameter 11 * Copyright (C) 2008-2014 Christoph Lameter 12 */ 12 */ 13 #include <linux/fs.h> 13 #include <linux/fs.h> 14 #include <linux/mm.h> 14 #include <linux/mm.h> 15 #include <linux/err.h> 15 #include <linux/err.h> 16 #include <linux/module.h> 16 #include <linux/module.h> 17 #include <linux/slab.h> 17 #include <linux/slab.h> 18 #include <linux/cpu.h> 18 #include <linux/cpu.h> 19 #include <linux/cpumask.h> 19 #include <linux/cpumask.h> 20 #include <linux/vmstat.h> 20 #include <linux/vmstat.h> 21 #include <linux/proc_fs.h> 21 #include <linux/proc_fs.h> 22 #include <linux/seq_file.h> 22 #include <linux/seq_file.h> 23 #include <linux/debugfs.h> 23 #include <linux/debugfs.h> 24 #include <linux/sched.h> 24 #include <linux/sched.h> 25 #include <linux/math64.h> 25 #include <linux/math64.h> 26 #include <linux/writeback.h> 26 #include <linux/writeback.h> 27 #include <linux/compaction.h> 27 #include <linux/compaction.h> 28 #include <linux/mm_inline.h> 28 #include <linux/mm_inline.h> >> 29 #include <linux/page_ext.h> 29 #include <linux/page_owner.h> 30 #include <linux/page_owner.h> 30 #include <linux/sched/isolation.h> << 31 31 32 #include "internal.h" 32 #include "internal.h" 33 33 34 #ifdef CONFIG_NUMA 34 #ifdef CONFIG_NUMA 35 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; 35 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; 36 36 37 /* zero numa counters within a zone */ 37 /* zero numa counters within a zone */ 38 static void zero_zone_numa_counters(struct zon 38 static void zero_zone_numa_counters(struct zone *zone) 39 { 39 { 40 int item, cpu; 40 int item, cpu; 41 41 42 for (item = 0; item < NR_VM_NUMA_EVENT 42 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) { 43 atomic_long_set(&zone->vm_numa 43 atomic_long_set(&zone->vm_numa_event[item], 0); 44 for_each_online_cpu(cpu) { 44 for_each_online_cpu(cpu) { 45 per_cpu_ptr(zone->per_ 45 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item] 46 46 = 0; 47 } 47 } 48 } 48 } 49 } 49 } 50 50 51 /* zero numa counters of all the populated zon 51 /* zero numa counters of all the populated zones */ 52 static void zero_zones_numa_counters(void) 52 static void zero_zones_numa_counters(void) 53 { 53 { 54 struct zone *zone; 54 struct zone *zone; 55 55 56 for_each_populated_zone(zone) 56 for_each_populated_zone(zone) 57 zero_zone_numa_counters(zone); 57 zero_zone_numa_counters(zone); 58 } 58 } 59 59 60 /* zero global numa counters */ 60 /* zero global numa counters */ 61 static void zero_global_numa_counters(void) 61 static void zero_global_numa_counters(void) 62 { 62 { 63 int item; 63 int item; 64 64 65 for (item = 0; item < NR_VM_NUMA_EVENT 65 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) 66 atomic_long_set(&vm_numa_event 66 atomic_long_set(&vm_numa_event[item], 0); 67 } 67 } 68 68 69 static void invalid_numa_statistics(void) 69 static void invalid_numa_statistics(void) 70 { 70 { 71 zero_zones_numa_counters(); 71 zero_zones_numa_counters(); 72 zero_global_numa_counters(); 72 zero_global_numa_counters(); 73 } 73 } 74 74 75 static DEFINE_MUTEX(vm_numa_stat_lock); 75 static DEFINE_MUTEX(vm_numa_stat_lock); 76 76 77 int sysctl_vm_numa_stat_handler(const struct c !! 77 int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, 78 void *buffer, size_t *length, 78 void *buffer, size_t *length, loff_t *ppos) 79 { 79 { 80 int ret, oldval; 80 int ret, oldval; 81 81 82 mutex_lock(&vm_numa_stat_lock); 82 mutex_lock(&vm_numa_stat_lock); 83 if (write) 83 if (write) 84 oldval = sysctl_vm_numa_stat; 84 oldval = sysctl_vm_numa_stat; 85 ret = proc_dointvec_minmax(table, writ 85 ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 86 if (ret || !write) 86 if (ret || !write) 87 goto out; 87 goto out; 88 88 89 if (oldval == sysctl_vm_numa_stat) 89 if (oldval == sysctl_vm_numa_stat) 90 goto out; 90 goto out; 91 else if (sysctl_vm_numa_stat == ENABLE 91 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) { 92 static_branch_enable(&vm_numa_ 92 static_branch_enable(&vm_numa_stat_key); 93 pr_info("enable numa statistic 93 pr_info("enable numa statistics\n"); 94 } else { 94 } else { 95 static_branch_disable(&vm_numa 95 static_branch_disable(&vm_numa_stat_key); 96 invalid_numa_statistics(); 96 invalid_numa_statistics(); 97 pr_info("disable numa statisti 97 pr_info("disable numa statistics, and clear numa counters\n"); 98 } 98 } 99 99 100 out: 100 out: 101 mutex_unlock(&vm_numa_stat_lock); 101 mutex_unlock(&vm_numa_stat_lock); 102 return ret; 102 return ret; 103 } 103 } 104 #endif 104 #endif 105 105 106 #ifdef CONFIG_VM_EVENT_COUNTERS 106 #ifdef CONFIG_VM_EVENT_COUNTERS 107 DEFINE_PER_CPU(struct vm_event_state, vm_event 107 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; 108 EXPORT_PER_CPU_SYMBOL(vm_event_states); 108 EXPORT_PER_CPU_SYMBOL(vm_event_states); 109 109 110 static void sum_vm_events(unsigned long *ret) 110 static void sum_vm_events(unsigned long *ret) 111 { 111 { 112 int cpu; 112 int cpu; 113 int i; 113 int i; 114 114 115 memset(ret, 0, NR_VM_EVENT_ITEMS * siz 115 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long)); 116 116 117 for_each_online_cpu(cpu) { 117 for_each_online_cpu(cpu) { 118 struct vm_event_state *this = 118 struct vm_event_state *this = &per_cpu(vm_event_states, cpu); 119 119 120 for (i = 0; i < NR_VM_EVENT_IT 120 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) 121 ret[i] += this->event[ 121 ret[i] += this->event[i]; 122 } 122 } 123 } 123 } 124 124 125 /* 125 /* 126 * Accumulate the vm event counters across all 126 * Accumulate the vm event counters across all CPUs. 127 * The result is unavoidably approximate - it 127 * The result is unavoidably approximate - it can change 128 * during and after execution of this function 128 * during and after execution of this function. 129 */ 129 */ 130 void all_vm_events(unsigned long *ret) 130 void all_vm_events(unsigned long *ret) 131 { 131 { 132 cpus_read_lock(); 132 cpus_read_lock(); 133 sum_vm_events(ret); 133 sum_vm_events(ret); 134 cpus_read_unlock(); 134 cpus_read_unlock(); 135 } 135 } 136 EXPORT_SYMBOL_GPL(all_vm_events); 136 EXPORT_SYMBOL_GPL(all_vm_events); 137 137 138 /* 138 /* 139 * Fold the foreign cpu events into our own. 139 * Fold the foreign cpu events into our own. 140 * 140 * 141 * This is adding to the events on one process 141 * This is adding to the events on one processor 142 * but keeps the global counts constant. 142 * but keeps the global counts constant. 143 */ 143 */ 144 void vm_events_fold_cpu(int cpu) 144 void vm_events_fold_cpu(int cpu) 145 { 145 { 146 struct vm_event_state *fold_state = &p 146 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu); 147 int i; 147 int i; 148 148 149 for (i = 0; i < NR_VM_EVENT_ITEMS; i++ 149 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) { 150 count_vm_events(i, fold_state- 150 count_vm_events(i, fold_state->event[i]); 151 fold_state->event[i] = 0; 151 fold_state->event[i] = 0; 152 } 152 } 153 } 153 } 154 154 155 #endif /* CONFIG_VM_EVENT_COUNTERS */ 155 #endif /* CONFIG_VM_EVENT_COUNTERS */ 156 156 157 /* 157 /* 158 * Manage combined zone based / global counter 158 * Manage combined zone based / global counters 159 * 159 * 160 * vm_stat contains the global counters 160 * vm_stat contains the global counters 161 */ 161 */ 162 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITE 162 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; 163 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITE 163 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; 164 atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_I 164 atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp; 165 EXPORT_SYMBOL(vm_zone_stat); 165 EXPORT_SYMBOL(vm_zone_stat); 166 EXPORT_SYMBOL(vm_node_stat); 166 EXPORT_SYMBOL(vm_node_stat); 167 167 168 #ifdef CONFIG_NUMA << 169 static void fold_vm_zone_numa_events(struct zo << 170 { << 171 unsigned long zone_numa_events[NR_VM_N << 172 int cpu; << 173 enum numa_stat_item item; << 174 << 175 for_each_online_cpu(cpu) { << 176 struct per_cpu_zonestat *pzsta << 177 << 178 pzstats = per_cpu_ptr(zone->pe << 179 for (item = 0; item < NR_VM_NU << 180 zone_numa_events[item] << 181 } << 182 << 183 for (item = 0; item < NR_VM_NUMA_EVENT << 184 zone_numa_event_add(zone_numa_ << 185 } << 186 << 187 void fold_vm_numa_events(void) << 188 { << 189 struct zone *zone; << 190 << 191 for_each_populated_zone(zone) << 192 fold_vm_zone_numa_events(zone) << 193 } << 194 #endif << 195 << 196 #ifdef CONFIG_SMP 168 #ifdef CONFIG_SMP 197 169 198 int calculate_pressure_threshold(struct zone * 170 int calculate_pressure_threshold(struct zone *zone) 199 { 171 { 200 int threshold; 172 int threshold; 201 int watermark_distance; 173 int watermark_distance; 202 174 203 /* 175 /* 204 * As vmstats are not up to date, ther 176 * As vmstats are not up to date, there is drift between the estimated 205 * and real values. For high threshold 177 * and real values. For high thresholds and a high number of CPUs, it 206 * is possible for the min watermark t 178 * is possible for the min watermark to be breached while the estimated 207 * value looks fine. The pressure thre 179 * value looks fine. The pressure threshold is a reduced value such 208 * that even the maximum amount of dri 180 * that even the maximum amount of drift will not accidentally breach 209 * the min watermark 181 * the min watermark 210 */ 182 */ 211 watermark_distance = low_wmark_pages(z 183 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone); 212 threshold = max(1, (int)(watermark_dis 184 threshold = max(1, (int)(watermark_distance / num_online_cpus())); 213 185 214 /* 186 /* 215 * Maximum threshold is 125 187 * Maximum threshold is 125 216 */ 188 */ 217 threshold = min(125, threshold); 189 threshold = min(125, threshold); 218 190 219 return threshold; 191 return threshold; 220 } 192 } 221 193 222 int calculate_normal_threshold(struct zone *zo 194 int calculate_normal_threshold(struct zone *zone) 223 { 195 { 224 int threshold; 196 int threshold; 225 int mem; /* memory in 128 MB un 197 int mem; /* memory in 128 MB units */ 226 198 227 /* 199 /* 228 * The threshold scales with the numbe 200 * The threshold scales with the number of processors and the amount 229 * of memory per zone. More memory mea 201 * of memory per zone. More memory means that we can defer updates for 230 * longer, more processors could lead 202 * longer, more processors could lead to more contention. 231 * fls() is used to have a cheap way o 203 * fls() is used to have a cheap way of logarithmic scaling. 232 * 204 * 233 * Some sample thresholds: 205 * Some sample thresholds: 234 * 206 * 235 * Threshold Processors (fls) 207 * Threshold Processors (fls) Zonesize fls(mem)+1 236 * ----------------------------------- 208 * ------------------------------------------------------------------ 237 * 8 1 1 209 * 8 1 1 0.9-1 GB 4 238 * 16 2 2 210 * 16 2 2 0.9-1 GB 4 239 * 20 2 2 211 * 20 2 2 1-2 GB 5 240 * 24 2 2 212 * 24 2 2 2-4 GB 6 241 * 28 2 2 213 * 28 2 2 4-8 GB 7 242 * 32 2 2 214 * 32 2 2 8-16 GB 8 243 * 4 2 2 215 * 4 2 2 <128M 1 244 * 30 4 3 216 * 30 4 3 2-4 GB 5 245 * 48 4 3 217 * 48 4 3 8-16 GB 8 246 * 32 8 4 218 * 32 8 4 1-2 GB 4 247 * 32 8 4 219 * 32 8 4 0.9-1GB 4 248 * 10 16 5 220 * 10 16 5 <128M 1 249 * 40 16 5 221 * 40 16 5 900M 4 250 * 70 64 7 222 * 70 64 7 2-4 GB 5 251 * 84 64 7 223 * 84 64 7 4-8 GB 6 252 * 108 512 9 224 * 108 512 9 4-8 GB 6 253 * 125 1024 10 225 * 125 1024 10 8-16 GB 8 254 * 125 1024 10 226 * 125 1024 10 16-32 GB 9 255 */ 227 */ 256 228 257 mem = zone_managed_pages(zone) >> (27 229 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT); 258 230 259 threshold = 2 * fls(num_online_cpus()) 231 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem)); 260 232 261 /* 233 /* 262 * Maximum threshold is 125 234 * Maximum threshold is 125 263 */ 235 */ 264 threshold = min(125, threshold); 236 threshold = min(125, threshold); 265 237 266 return threshold; 238 return threshold; 267 } 239 } 268 240 269 /* 241 /* 270 * Refresh the thresholds for each zone. 242 * Refresh the thresholds for each zone. 271 */ 243 */ 272 void refresh_zone_stat_thresholds(void) 244 void refresh_zone_stat_thresholds(void) 273 { 245 { 274 struct pglist_data *pgdat; 246 struct pglist_data *pgdat; 275 struct zone *zone; 247 struct zone *zone; 276 int cpu; 248 int cpu; 277 int threshold; 249 int threshold; 278 250 279 /* Zero current pgdat thresholds */ 251 /* Zero current pgdat thresholds */ 280 for_each_online_pgdat(pgdat) { 252 for_each_online_pgdat(pgdat) { 281 for_each_online_cpu(cpu) { 253 for_each_online_cpu(cpu) { 282 per_cpu_ptr(pgdat->per 254 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0; 283 } 255 } 284 } 256 } 285 257 286 for_each_populated_zone(zone) { 258 for_each_populated_zone(zone) { 287 struct pglist_data *pgdat = zo 259 struct pglist_data *pgdat = zone->zone_pgdat; 288 unsigned long max_drift, toler 260 unsigned long max_drift, tolerate_drift; 289 261 290 threshold = calculate_normal_t 262 threshold = calculate_normal_threshold(zone); 291 263 292 for_each_online_cpu(cpu) { 264 for_each_online_cpu(cpu) { 293 int pgdat_threshold; 265 int pgdat_threshold; 294 266 295 per_cpu_ptr(zone->per_ 267 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold 296 268 = threshold; 297 269 298 /* Base nodestat thres 270 /* Base nodestat threshold on the largest populated zone. */ 299 pgdat_threshold = per_ 271 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold; 300 per_cpu_ptr(pgdat->per 272 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold 301 = max(threshol 273 = max(threshold, pgdat_threshold); 302 } 274 } 303 275 304 /* 276 /* 305 * Only set percpu_drift_mark 277 * Only set percpu_drift_mark if there is a danger that 306 * NR_FREE_PAGES reports the l 278 * NR_FREE_PAGES reports the low watermark is ok when in fact 307 * the min watermark could be 279 * the min watermark could be breached by an allocation 308 */ 280 */ 309 tolerate_drift = low_wmark_pag 281 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); 310 max_drift = num_online_cpus() 282 max_drift = num_online_cpus() * threshold; 311 if (max_drift > tolerate_drift 283 if (max_drift > tolerate_drift) 312 zone->percpu_drift_mar 284 zone->percpu_drift_mark = high_wmark_pages(zone) + 313 max_dr 285 max_drift; 314 } 286 } 315 } 287 } 316 288 317 void set_pgdat_percpu_threshold(pg_data_t *pgd 289 void set_pgdat_percpu_threshold(pg_data_t *pgdat, 318 int (*calculat 290 int (*calculate_pressure)(struct zone *)) 319 { 291 { 320 struct zone *zone; 292 struct zone *zone; 321 int cpu; 293 int cpu; 322 int threshold; 294 int threshold; 323 int i; 295 int i; 324 296 325 for (i = 0; i < pgdat->nr_zones; i++) 297 for (i = 0; i < pgdat->nr_zones; i++) { 326 zone = &pgdat->node_zones[i]; 298 zone = &pgdat->node_zones[i]; 327 if (!zone->percpu_drift_mark) 299 if (!zone->percpu_drift_mark) 328 continue; 300 continue; 329 301 330 threshold = (*calculate_pressu 302 threshold = (*calculate_pressure)(zone); 331 for_each_online_cpu(cpu) 303 for_each_online_cpu(cpu) 332 per_cpu_ptr(zone->per_ 304 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold 333 305 = threshold; 334 } 306 } 335 } 307 } 336 308 337 /* 309 /* 338 * For use when we know that interrupts are di 310 * For use when we know that interrupts are disabled, 339 * or when we know that preemption is disabled 311 * or when we know that preemption is disabled and that 340 * particular counter cannot be updated from i 312 * particular counter cannot be updated from interrupt context. 341 */ 313 */ 342 void __mod_zone_page_state(struct zone *zone, 314 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 343 long delta) 315 long delta) 344 { 316 { 345 struct per_cpu_zonestat __percpu *pcp 317 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 346 s8 __percpu *p = pcp->vm_stat_diff + i 318 s8 __percpu *p = pcp->vm_stat_diff + item; 347 long x; 319 long x; 348 long t; 320 long t; 349 321 350 /* 322 /* 351 * Accurate vmstat updates require a R 323 * Accurate vmstat updates require a RMW. On !PREEMPT_RT kernels, 352 * atomicity is provided by IRQs being 324 * atomicity is provided by IRQs being disabled -- either explicitly 353 * or via local_lock_irq. On PREEMPT_R 325 * or via local_lock_irq. On PREEMPT_RT, local_lock_irq only disables 354 * CPU migrations and preemption poten 326 * CPU migrations and preemption potentially corrupts a counter so 355 * disable preemption. 327 * disable preemption. 356 */ 328 */ 357 preempt_disable_nested(); !! 329 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 330 preempt_disable(); 358 331 359 x = delta + __this_cpu_read(*p); 332 x = delta + __this_cpu_read(*p); 360 333 361 t = __this_cpu_read(pcp->stat_threshol 334 t = __this_cpu_read(pcp->stat_threshold); 362 335 363 if (unlikely(abs(x) > t)) { 336 if (unlikely(abs(x) > t)) { 364 zone_page_state_add(x, zone, i 337 zone_page_state_add(x, zone, item); 365 x = 0; 338 x = 0; 366 } 339 } 367 __this_cpu_write(*p, x); 340 __this_cpu_write(*p, x); 368 341 369 preempt_enable_nested(); !! 342 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 343 preempt_enable(); 370 } 344 } 371 EXPORT_SYMBOL(__mod_zone_page_state); 345 EXPORT_SYMBOL(__mod_zone_page_state); 372 346 373 void __mod_node_page_state(struct pglist_data 347 void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 374 long delta) 348 long delta) 375 { 349 { 376 struct per_cpu_nodestat __percpu *pcp 350 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 377 s8 __percpu *p = pcp->vm_node_stat_dif 351 s8 __percpu *p = pcp->vm_node_stat_diff + item; 378 long x; 352 long x; 379 long t; 353 long t; 380 354 381 if (vmstat_item_in_bytes(item)) { 355 if (vmstat_item_in_bytes(item)) { 382 /* 356 /* 383 * Only cgroups use subpage ac 357 * Only cgroups use subpage accounting right now; at 384 * the global level, these ite 358 * the global level, these items still change in 385 * multiples of whole pages. S 359 * multiples of whole pages. Store them as pages 386 * internally to keep the per- 360 * internally to keep the per-cpu counters compact. 387 */ 361 */ 388 VM_WARN_ON_ONCE(delta & (PAGE_ 362 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 389 delta >>= PAGE_SHIFT; 363 delta >>= PAGE_SHIFT; 390 } 364 } 391 365 392 /* See __mod_node_page_state */ 366 /* See __mod_node_page_state */ 393 preempt_disable_nested(); !! 367 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 368 preempt_disable(); 394 369 395 x = delta + __this_cpu_read(*p); 370 x = delta + __this_cpu_read(*p); 396 371 397 t = __this_cpu_read(pcp->stat_threshol 372 t = __this_cpu_read(pcp->stat_threshold); 398 373 399 if (unlikely(abs(x) > t)) { 374 if (unlikely(abs(x) > t)) { 400 node_page_state_add(x, pgdat, 375 node_page_state_add(x, pgdat, item); 401 x = 0; 376 x = 0; 402 } 377 } 403 __this_cpu_write(*p, x); 378 __this_cpu_write(*p, x); 404 379 405 preempt_enable_nested(); !! 380 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 381 preempt_enable(); 406 } 382 } 407 EXPORT_SYMBOL(__mod_node_page_state); 383 EXPORT_SYMBOL(__mod_node_page_state); 408 384 409 /* 385 /* 410 * Optimized increment and decrement functions 386 * Optimized increment and decrement functions. 411 * 387 * 412 * These are only for a single page and theref 388 * These are only for a single page and therefore can take a struct page * 413 * argument instead of struct zone *. This all 389 * argument instead of struct zone *. This allows the inclusion of the code 414 * generated for page_zone(page) into the opti 390 * generated for page_zone(page) into the optimized functions. 415 * 391 * 416 * No overflow check is necessary and therefor 392 * No overflow check is necessary and therefore the differential can be 417 * incremented or decremented in place which m 393 * incremented or decremented in place which may allow the compilers to 418 * generate better code. 394 * generate better code. 419 * The increment or decrement is known and the 395 * The increment or decrement is known and therefore one boundary check can 420 * be omitted. 396 * be omitted. 421 * 397 * 422 * NOTE: These functions are very performance 398 * NOTE: These functions are very performance sensitive. Change only 423 * with care. 399 * with care. 424 * 400 * 425 * Some processors have inc/dec instructions t 401 * Some processors have inc/dec instructions that are atomic vs an interrupt. 426 * However, the code must first determine the 402 * However, the code must first determine the differential location in a zone 427 * based on the processor number and then inc/ 403 * based on the processor number and then inc/dec the counter. There is no 428 * guarantee without disabling preemption that 404 * guarantee without disabling preemption that the processor will not change 429 * in between and therefore the atomicity vs. 405 * in between and therefore the atomicity vs. interrupt cannot be exploited 430 * in a useful way here. 406 * in a useful way here. 431 */ 407 */ 432 void __inc_zone_state(struct zone *zone, enum 408 void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 433 { 409 { 434 struct per_cpu_zonestat __percpu *pcp 410 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 435 s8 __percpu *p = pcp->vm_stat_diff + i 411 s8 __percpu *p = pcp->vm_stat_diff + item; 436 s8 v, t; 412 s8 v, t; 437 413 438 /* See __mod_node_page_state */ 414 /* See __mod_node_page_state */ 439 preempt_disable_nested(); !! 415 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 416 preempt_disable(); 440 417 441 v = __this_cpu_inc_return(*p); 418 v = __this_cpu_inc_return(*p); 442 t = __this_cpu_read(pcp->stat_threshol 419 t = __this_cpu_read(pcp->stat_threshold); 443 if (unlikely(v > t)) { 420 if (unlikely(v > t)) { 444 s8 overstep = t >> 1; 421 s8 overstep = t >> 1; 445 422 446 zone_page_state_add(v + overst 423 zone_page_state_add(v + overstep, zone, item); 447 __this_cpu_write(*p, -overstep 424 __this_cpu_write(*p, -overstep); 448 } 425 } 449 426 450 preempt_enable_nested(); !! 427 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 428 preempt_enable(); 451 } 429 } 452 430 453 void __inc_node_state(struct pglist_data *pgda 431 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 454 { 432 { 455 struct per_cpu_nodestat __percpu *pcp 433 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 456 s8 __percpu *p = pcp->vm_node_stat_dif 434 s8 __percpu *p = pcp->vm_node_stat_diff + item; 457 s8 v, t; 435 s8 v, t; 458 436 459 VM_WARN_ON_ONCE(vmstat_item_in_bytes(i 437 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 460 438 461 /* See __mod_node_page_state */ 439 /* See __mod_node_page_state */ 462 preempt_disable_nested(); !! 440 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 441 preempt_disable(); 463 442 464 v = __this_cpu_inc_return(*p); 443 v = __this_cpu_inc_return(*p); 465 t = __this_cpu_read(pcp->stat_threshol 444 t = __this_cpu_read(pcp->stat_threshold); 466 if (unlikely(v > t)) { 445 if (unlikely(v > t)) { 467 s8 overstep = t >> 1; 446 s8 overstep = t >> 1; 468 447 469 node_page_state_add(v + overst 448 node_page_state_add(v + overstep, pgdat, item); 470 __this_cpu_write(*p, -overstep 449 __this_cpu_write(*p, -overstep); 471 } 450 } 472 451 473 preempt_enable_nested(); !! 452 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 453 preempt_enable(); 474 } 454 } 475 455 476 void __inc_zone_page_state(struct page *page, 456 void __inc_zone_page_state(struct page *page, enum zone_stat_item item) 477 { 457 { 478 __inc_zone_state(page_zone(page), item 458 __inc_zone_state(page_zone(page), item); 479 } 459 } 480 EXPORT_SYMBOL(__inc_zone_page_state); 460 EXPORT_SYMBOL(__inc_zone_page_state); 481 461 482 void __inc_node_page_state(struct page *page, 462 void __inc_node_page_state(struct page *page, enum node_stat_item item) 483 { 463 { 484 __inc_node_state(page_pgdat(page), ite 464 __inc_node_state(page_pgdat(page), item); 485 } 465 } 486 EXPORT_SYMBOL(__inc_node_page_state); 466 EXPORT_SYMBOL(__inc_node_page_state); 487 467 488 void __dec_zone_state(struct zone *zone, enum 468 void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 489 { 469 { 490 struct per_cpu_zonestat __percpu *pcp 470 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 491 s8 __percpu *p = pcp->vm_stat_diff + i 471 s8 __percpu *p = pcp->vm_stat_diff + item; 492 s8 v, t; 472 s8 v, t; 493 473 494 /* See __mod_node_page_state */ 474 /* See __mod_node_page_state */ 495 preempt_disable_nested(); !! 475 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 476 preempt_disable(); 496 477 497 v = __this_cpu_dec_return(*p); 478 v = __this_cpu_dec_return(*p); 498 t = __this_cpu_read(pcp->stat_threshol 479 t = __this_cpu_read(pcp->stat_threshold); 499 if (unlikely(v < - t)) { 480 if (unlikely(v < - t)) { 500 s8 overstep = t >> 1; 481 s8 overstep = t >> 1; 501 482 502 zone_page_state_add(v - overst 483 zone_page_state_add(v - overstep, zone, item); 503 __this_cpu_write(*p, overstep) 484 __this_cpu_write(*p, overstep); 504 } 485 } 505 486 506 preempt_enable_nested(); !! 487 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 488 preempt_enable(); 507 } 489 } 508 490 509 void __dec_node_state(struct pglist_data *pgda 491 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) 510 { 492 { 511 struct per_cpu_nodestat __percpu *pcp 493 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 512 s8 __percpu *p = pcp->vm_node_stat_dif 494 s8 __percpu *p = pcp->vm_node_stat_diff + item; 513 s8 v, t; 495 s8 v, t; 514 496 515 VM_WARN_ON_ONCE(vmstat_item_in_bytes(i 497 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 516 498 517 /* See __mod_node_page_state */ 499 /* See __mod_node_page_state */ 518 preempt_disable_nested(); !! 500 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 501 preempt_disable(); 519 502 520 v = __this_cpu_dec_return(*p); 503 v = __this_cpu_dec_return(*p); 521 t = __this_cpu_read(pcp->stat_threshol 504 t = __this_cpu_read(pcp->stat_threshold); 522 if (unlikely(v < - t)) { 505 if (unlikely(v < - t)) { 523 s8 overstep = t >> 1; 506 s8 overstep = t >> 1; 524 507 525 node_page_state_add(v - overst 508 node_page_state_add(v - overstep, pgdat, item); 526 __this_cpu_write(*p, overstep) 509 __this_cpu_write(*p, overstep); 527 } 510 } 528 511 529 preempt_enable_nested(); !! 512 if (IS_ENABLED(CONFIG_PREEMPT_RT)) >> 513 preempt_enable(); 530 } 514 } 531 515 532 void __dec_zone_page_state(struct page *page, 516 void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 533 { 517 { 534 __dec_zone_state(page_zone(page), item 518 __dec_zone_state(page_zone(page), item); 535 } 519 } 536 EXPORT_SYMBOL(__dec_zone_page_state); 520 EXPORT_SYMBOL(__dec_zone_page_state); 537 521 538 void __dec_node_page_state(struct page *page, 522 void __dec_node_page_state(struct page *page, enum node_stat_item item) 539 { 523 { 540 __dec_node_state(page_pgdat(page), ite 524 __dec_node_state(page_pgdat(page), item); 541 } 525 } 542 EXPORT_SYMBOL(__dec_node_page_state); 526 EXPORT_SYMBOL(__dec_node_page_state); 543 527 544 #ifdef CONFIG_HAVE_CMPXCHG_LOCAL 528 #ifdef CONFIG_HAVE_CMPXCHG_LOCAL 545 /* 529 /* 546 * If we have cmpxchg_local support then we do 530 * If we have cmpxchg_local support then we do not need to incur the overhead 547 * that comes with local_irq_save/restore if w 531 * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. 548 * 532 * 549 * mod_state() modifies the zone counter state 533 * mod_state() modifies the zone counter state through atomic per cpu 550 * operations. 534 * operations. 551 * 535 * 552 * Overstep mode specifies how overstep should 536 * Overstep mode specifies how overstep should handled: 553 * 0 No overstepping 537 * 0 No overstepping 554 * 1 Overstepping half of threshold 538 * 1 Overstepping half of threshold 555 * -1 Overstepping minus half of thre 539 * -1 Overstepping minus half of threshold 556 */ 540 */ 557 static inline void mod_zone_state(struct zone 541 static inline void mod_zone_state(struct zone *zone, 558 enum zone_stat_item item, long delta, i 542 enum zone_stat_item item, long delta, int overstep_mode) 559 { 543 { 560 struct per_cpu_zonestat __percpu *pcp 544 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats; 561 s8 __percpu *p = pcp->vm_stat_diff + i 545 s8 __percpu *p = pcp->vm_stat_diff + item; 562 long n, t, z; !! 546 long o, n, t, z; 563 s8 o; << 564 547 565 o = this_cpu_read(*p); << 566 do { 548 do { 567 z = 0; /* overflow to zone co 549 z = 0; /* overflow to zone counters */ 568 550 569 /* 551 /* 570 * The fetching of the stat_th 552 * The fetching of the stat_threshold is racy. We may apply 571 * a counter threshold to the 553 * a counter threshold to the wrong the cpu if we get 572 * rescheduled while executing 554 * rescheduled while executing here. However, the next 573 * counter update will apply t 555 * counter update will apply the threshold again and 574 * therefore bring the counter 556 * therefore bring the counter under the threshold again. 575 * 557 * 576 * Most of the time the thresh 558 * Most of the time the thresholds are the same anyways 577 * for all cpus in a zone. 559 * for all cpus in a zone. 578 */ 560 */ 579 t = this_cpu_read(pcp->stat_th 561 t = this_cpu_read(pcp->stat_threshold); 580 562 581 n = delta + (long)o; !! 563 o = this_cpu_read(*p); >> 564 n = delta + o; 582 565 583 if (abs(n) > t) { 566 if (abs(n) > t) { 584 int os = overstep_mode 567 int os = overstep_mode * (t >> 1) ; 585 568 586 /* Overflow must be ad 569 /* Overflow must be added to zone counters */ 587 z = n + os; 570 z = n + os; 588 n = -os; 571 n = -os; 589 } 572 } 590 } while (!this_cpu_try_cmpxchg(*p, &o, !! 573 } while (this_cpu_cmpxchg(*p, o, n) != o); 591 574 592 if (z) 575 if (z) 593 zone_page_state_add(z, zone, i 576 zone_page_state_add(z, zone, item); 594 } 577 } 595 578 596 void mod_zone_page_state(struct zone *zone, en 579 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 597 long delta) 580 long delta) 598 { 581 { 599 mod_zone_state(zone, item, delta, 0); 582 mod_zone_state(zone, item, delta, 0); 600 } 583 } 601 EXPORT_SYMBOL(mod_zone_page_state); 584 EXPORT_SYMBOL(mod_zone_page_state); 602 585 603 void inc_zone_page_state(struct page *page, en 586 void inc_zone_page_state(struct page *page, enum zone_stat_item item) 604 { 587 { 605 mod_zone_state(page_zone(page), item, 588 mod_zone_state(page_zone(page), item, 1, 1); 606 } 589 } 607 EXPORT_SYMBOL(inc_zone_page_state); 590 EXPORT_SYMBOL(inc_zone_page_state); 608 591 609 void dec_zone_page_state(struct page *page, en 592 void dec_zone_page_state(struct page *page, enum zone_stat_item item) 610 { 593 { 611 mod_zone_state(page_zone(page), item, 594 mod_zone_state(page_zone(page), item, -1, -1); 612 } 595 } 613 EXPORT_SYMBOL(dec_zone_page_state); 596 EXPORT_SYMBOL(dec_zone_page_state); 614 597 615 static inline void mod_node_state(struct pglis 598 static inline void mod_node_state(struct pglist_data *pgdat, 616 enum node_stat_item item, int delta, in 599 enum node_stat_item item, int delta, int overstep_mode) 617 { 600 { 618 struct per_cpu_nodestat __percpu *pcp 601 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats; 619 s8 __percpu *p = pcp->vm_node_stat_dif 602 s8 __percpu *p = pcp->vm_node_stat_diff + item; 620 long n, t, z; !! 603 long o, n, t, z; 621 s8 o; << 622 604 623 if (vmstat_item_in_bytes(item)) { 605 if (vmstat_item_in_bytes(item)) { 624 /* 606 /* 625 * Only cgroups use subpage ac 607 * Only cgroups use subpage accounting right now; at 626 * the global level, these ite 608 * the global level, these items still change in 627 * multiples of whole pages. S 609 * multiples of whole pages. Store them as pages 628 * internally to keep the per- 610 * internally to keep the per-cpu counters compact. 629 */ 611 */ 630 VM_WARN_ON_ONCE(delta & (PAGE_ 612 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); 631 delta >>= PAGE_SHIFT; 613 delta >>= PAGE_SHIFT; 632 } 614 } 633 615 634 o = this_cpu_read(*p); << 635 do { 616 do { 636 z = 0; /* overflow to node co 617 z = 0; /* overflow to node counters */ 637 618 638 /* 619 /* 639 * The fetching of the stat_th 620 * The fetching of the stat_threshold is racy. We may apply 640 * a counter threshold to the 621 * a counter threshold to the wrong the cpu if we get 641 * rescheduled while executing 622 * rescheduled while executing here. However, the next 642 * counter update will apply t 623 * counter update will apply the threshold again and 643 * therefore bring the counter 624 * therefore bring the counter under the threshold again. 644 * 625 * 645 * Most of the time the thresh 626 * Most of the time the thresholds are the same anyways 646 * for all cpus in a node. 627 * for all cpus in a node. 647 */ 628 */ 648 t = this_cpu_read(pcp->stat_th 629 t = this_cpu_read(pcp->stat_threshold); 649 630 650 n = delta + (long)o; !! 631 o = this_cpu_read(*p); >> 632 n = delta + o; 651 633 652 if (abs(n) > t) { 634 if (abs(n) > t) { 653 int os = overstep_mode 635 int os = overstep_mode * (t >> 1) ; 654 636 655 /* Overflow must be ad 637 /* Overflow must be added to node counters */ 656 z = n + os; 638 z = n + os; 657 n = -os; 639 n = -os; 658 } 640 } 659 } while (!this_cpu_try_cmpxchg(*p, &o, !! 641 } while (this_cpu_cmpxchg(*p, o, n) != o); 660 642 661 if (z) 643 if (z) 662 node_page_state_add(z, pgdat, 644 node_page_state_add(z, pgdat, item); 663 } 645 } 664 646 665 void mod_node_page_state(struct pglist_data *p 647 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 666 long d 648 long delta) 667 { 649 { 668 mod_node_state(pgdat, item, delta, 0); 650 mod_node_state(pgdat, item, delta, 0); 669 } 651 } 670 EXPORT_SYMBOL(mod_node_page_state); 652 EXPORT_SYMBOL(mod_node_page_state); 671 653 672 void inc_node_state(struct pglist_data *pgdat, 654 void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 673 { 655 { 674 mod_node_state(pgdat, item, 1, 1); 656 mod_node_state(pgdat, item, 1, 1); 675 } 657 } 676 658 677 void inc_node_page_state(struct page *page, en 659 void inc_node_page_state(struct page *page, enum node_stat_item item) 678 { 660 { 679 mod_node_state(page_pgdat(page), item, 661 mod_node_state(page_pgdat(page), item, 1, 1); 680 } 662 } 681 EXPORT_SYMBOL(inc_node_page_state); 663 EXPORT_SYMBOL(inc_node_page_state); 682 664 683 void dec_node_page_state(struct page *page, en 665 void dec_node_page_state(struct page *page, enum node_stat_item item) 684 { 666 { 685 mod_node_state(page_pgdat(page), item, 667 mod_node_state(page_pgdat(page), item, -1, -1); 686 } 668 } 687 EXPORT_SYMBOL(dec_node_page_state); 669 EXPORT_SYMBOL(dec_node_page_state); 688 #else 670 #else 689 /* 671 /* 690 * Use interrupt disable to serialize counter 672 * Use interrupt disable to serialize counter updates 691 */ 673 */ 692 void mod_zone_page_state(struct zone *zone, en 674 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, 693 long delta) 675 long delta) 694 { 676 { 695 unsigned long flags; 677 unsigned long flags; 696 678 697 local_irq_save(flags); 679 local_irq_save(flags); 698 __mod_zone_page_state(zone, item, delt 680 __mod_zone_page_state(zone, item, delta); 699 local_irq_restore(flags); 681 local_irq_restore(flags); 700 } 682 } 701 EXPORT_SYMBOL(mod_zone_page_state); 683 EXPORT_SYMBOL(mod_zone_page_state); 702 684 703 void inc_zone_page_state(struct page *page, en 685 void inc_zone_page_state(struct page *page, enum zone_stat_item item) 704 { 686 { 705 unsigned long flags; 687 unsigned long flags; 706 struct zone *zone; 688 struct zone *zone; 707 689 708 zone = page_zone(page); 690 zone = page_zone(page); 709 local_irq_save(flags); 691 local_irq_save(flags); 710 __inc_zone_state(zone, item); 692 __inc_zone_state(zone, item); 711 local_irq_restore(flags); 693 local_irq_restore(flags); 712 } 694 } 713 EXPORT_SYMBOL(inc_zone_page_state); 695 EXPORT_SYMBOL(inc_zone_page_state); 714 696 715 void dec_zone_page_state(struct page *page, en 697 void dec_zone_page_state(struct page *page, enum zone_stat_item item) 716 { 698 { 717 unsigned long flags; 699 unsigned long flags; 718 700 719 local_irq_save(flags); 701 local_irq_save(flags); 720 __dec_zone_page_state(page, item); 702 __dec_zone_page_state(page, item); 721 local_irq_restore(flags); 703 local_irq_restore(flags); 722 } 704 } 723 EXPORT_SYMBOL(dec_zone_page_state); 705 EXPORT_SYMBOL(dec_zone_page_state); 724 706 725 void inc_node_state(struct pglist_data *pgdat, 707 void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) 726 { 708 { 727 unsigned long flags; 709 unsigned long flags; 728 710 729 local_irq_save(flags); 711 local_irq_save(flags); 730 __inc_node_state(pgdat, item); 712 __inc_node_state(pgdat, item); 731 local_irq_restore(flags); 713 local_irq_restore(flags); 732 } 714 } 733 EXPORT_SYMBOL(inc_node_state); 715 EXPORT_SYMBOL(inc_node_state); 734 716 735 void mod_node_page_state(struct pglist_data *p 717 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, 736 long d 718 long delta) 737 { 719 { 738 unsigned long flags; 720 unsigned long flags; 739 721 740 local_irq_save(flags); 722 local_irq_save(flags); 741 __mod_node_page_state(pgdat, item, del 723 __mod_node_page_state(pgdat, item, delta); 742 local_irq_restore(flags); 724 local_irq_restore(flags); 743 } 725 } 744 EXPORT_SYMBOL(mod_node_page_state); 726 EXPORT_SYMBOL(mod_node_page_state); 745 727 746 void inc_node_page_state(struct page *page, en 728 void inc_node_page_state(struct page *page, enum node_stat_item item) 747 { 729 { 748 unsigned long flags; 730 unsigned long flags; 749 struct pglist_data *pgdat; 731 struct pglist_data *pgdat; 750 732 751 pgdat = page_pgdat(page); 733 pgdat = page_pgdat(page); 752 local_irq_save(flags); 734 local_irq_save(flags); 753 __inc_node_state(pgdat, item); 735 __inc_node_state(pgdat, item); 754 local_irq_restore(flags); 736 local_irq_restore(flags); 755 } 737 } 756 EXPORT_SYMBOL(inc_node_page_state); 738 EXPORT_SYMBOL(inc_node_page_state); 757 739 758 void dec_node_page_state(struct page *page, en 740 void dec_node_page_state(struct page *page, enum node_stat_item item) 759 { 741 { 760 unsigned long flags; 742 unsigned long flags; 761 743 762 local_irq_save(flags); 744 local_irq_save(flags); 763 __dec_node_page_state(page, item); 745 __dec_node_page_state(page, item); 764 local_irq_restore(flags); 746 local_irq_restore(flags); 765 } 747 } 766 EXPORT_SYMBOL(dec_node_page_state); 748 EXPORT_SYMBOL(dec_node_page_state); 767 #endif 749 #endif 768 750 769 /* 751 /* 770 * Fold a differential into the global counter 752 * Fold a differential into the global counters. 771 * Returns the number of counters updated. 753 * Returns the number of counters updated. 772 */ 754 */ 773 static int fold_diff(int *zone_diff, int *node 755 static int fold_diff(int *zone_diff, int *node_diff) 774 { 756 { 775 int i; 757 int i; 776 int changes = 0; 758 int changes = 0; 777 759 778 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; 760 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 779 if (zone_diff[i]) { 761 if (zone_diff[i]) { 780 atomic_long_add(zone_d 762 atomic_long_add(zone_diff[i], &vm_zone_stat[i]); 781 changes++; 763 changes++; 782 } 764 } 783 765 784 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; 766 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 785 if (node_diff[i]) { 767 if (node_diff[i]) { 786 atomic_long_add(node_d 768 atomic_long_add(node_diff[i], &vm_node_stat[i]); 787 changes++; 769 changes++; 788 } 770 } 789 return changes; 771 return changes; 790 } 772 } 791 773 >> 774 #ifdef CONFIG_NUMA >> 775 static void fold_vm_zone_numa_events(struct zone *zone) >> 776 { >> 777 unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; >> 778 int cpu; >> 779 enum numa_stat_item item; >> 780 >> 781 for_each_online_cpu(cpu) { >> 782 struct per_cpu_zonestat *pzstats; >> 783 >> 784 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); >> 785 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) >> 786 zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); >> 787 } >> 788 >> 789 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) >> 790 zone_numa_event_add(zone_numa_events[item], zone, item); >> 791 } >> 792 >> 793 void fold_vm_numa_events(void) >> 794 { >> 795 struct zone *zone; >> 796 >> 797 for_each_populated_zone(zone) >> 798 fold_vm_zone_numa_events(zone); >> 799 } >> 800 #endif >> 801 792 /* 802 /* 793 * Update the zone counters for the current cp 803 * Update the zone counters for the current cpu. 794 * 804 * 795 * Note that refresh_cpu_vm_stats strives to o 805 * Note that refresh_cpu_vm_stats strives to only access 796 * node local memory. The per cpu pagesets on 806 * node local memory. The per cpu pagesets on remote zones are placed 797 * in the memory local to the processor using 807 * in the memory local to the processor using that pageset. So the 798 * loop over all zones will access a series of 808 * loop over all zones will access a series of cachelines local to 799 * the processor. 809 * the processor. 800 * 810 * 801 * The call to zone_page_state_add updates the 811 * The call to zone_page_state_add updates the cachelines with the 802 * statistics in the remote zone struct as wel 812 * statistics in the remote zone struct as well as the global cachelines 803 * with the global counters. These could cause 813 * with the global counters. These could cause remote node cache line 804 * bouncing and will have to be only done when 814 * bouncing and will have to be only done when necessary. 805 * 815 * 806 * The function returns the number of global c 816 * The function returns the number of global counters updated. 807 */ 817 */ 808 static int refresh_cpu_vm_stats(bool do_pagese 818 static int refresh_cpu_vm_stats(bool do_pagesets) 809 { 819 { 810 struct pglist_data *pgdat; 820 struct pglist_data *pgdat; 811 struct zone *zone; 821 struct zone *zone; 812 int i; 822 int i; 813 int global_zone_diff[NR_VM_ZONE_STAT_I 823 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 814 int global_node_diff[NR_VM_NODE_STAT_I 824 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 815 int changes = 0; 825 int changes = 0; 816 826 817 for_each_populated_zone(zone) { 827 for_each_populated_zone(zone) { 818 struct per_cpu_zonestat __perc 828 struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats; >> 829 #ifdef CONFIG_NUMA 819 struct per_cpu_pages __percpu 830 struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset; >> 831 #endif 820 832 821 for (i = 0; i < NR_VM_ZONE_STA 833 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 822 int v; 834 int v; 823 835 824 v = this_cpu_xchg(pzst 836 v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0); 825 if (v) { 837 if (v) { 826 838 827 atomic_long_ad 839 atomic_long_add(v, &zone->vm_stat[i]); 828 global_zone_di 840 global_zone_diff[i] += v; 829 #ifdef CONFIG_NUMA 841 #ifdef CONFIG_NUMA 830 /* 3 seconds i 842 /* 3 seconds idle till flush */ 831 __this_cpu_wri 843 __this_cpu_write(pcp->expire, 3); 832 #endif 844 #endif 833 } 845 } 834 } 846 } >> 847 #ifdef CONFIG_NUMA 835 848 836 if (do_pagesets) { 849 if (do_pagesets) { 837 cond_resched(); 850 cond_resched(); 838 << 839 changes += decay_pcp_h << 840 #ifdef CONFIG_NUMA << 841 /* 851 /* 842 * Deal with draining 852 * Deal with draining the remote pageset of this 843 * processor 853 * processor 844 * 854 * 845 * Check if there are 855 * Check if there are pages remaining in this pageset 846 * if not then there i 856 * if not then there is nothing to expire. 847 */ 857 */ 848 if (!__this_cpu_read(p 858 if (!__this_cpu_read(pcp->expire) || 849 !__this_cpu_rea 859 !__this_cpu_read(pcp->count)) 850 continue; 860 continue; 851 861 852 /* 862 /* 853 * We never drain zone 863 * We never drain zones local to this processor. 854 */ 864 */ 855 if (zone_to_nid(zone) 865 if (zone_to_nid(zone) == numa_node_id()) { 856 __this_cpu_wri 866 __this_cpu_write(pcp->expire, 0); 857 continue; 867 continue; 858 } 868 } 859 869 860 if (__this_cpu_dec_ret !! 870 if (__this_cpu_dec_return(pcp->expire)) 861 changes++; << 862 continue; 871 continue; 863 } << 864 872 865 if (__this_cpu_read(pc 873 if (__this_cpu_read(pcp->count)) { 866 drain_zone_pag 874 drain_zone_pages(zone, this_cpu_ptr(pcp)); 867 changes++; 875 changes++; 868 } 876 } 869 #endif << 870 } 877 } >> 878 #endif 871 } 879 } 872 880 873 for_each_online_pgdat(pgdat) { 881 for_each_online_pgdat(pgdat) { 874 struct per_cpu_nodestat __perc 882 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats; 875 883 876 for (i = 0; i < NR_VM_NODE_STA 884 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 877 int v; 885 int v; 878 886 879 v = this_cpu_xchg(p->v 887 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0); 880 if (v) { 888 if (v) { 881 atomic_long_ad 889 atomic_long_add(v, &pgdat->vm_stat[i]); 882 global_node_di 890 global_node_diff[i] += v; 883 } 891 } 884 } 892 } 885 } 893 } 886 894 887 changes += fold_diff(global_zone_diff, 895 changes += fold_diff(global_zone_diff, global_node_diff); 888 return changes; 896 return changes; 889 } 897 } 890 898 891 /* 899 /* 892 * Fold the data for an offline cpu into the g 900 * Fold the data for an offline cpu into the global array. 893 * There cannot be any access by the offline c 901 * There cannot be any access by the offline cpu and therefore 894 * synchronization is simplified. 902 * synchronization is simplified. 895 */ 903 */ 896 void cpu_vm_stats_fold(int cpu) 904 void cpu_vm_stats_fold(int cpu) 897 { 905 { 898 struct pglist_data *pgdat; 906 struct pglist_data *pgdat; 899 struct zone *zone; 907 struct zone *zone; 900 int i; 908 int i; 901 int global_zone_diff[NR_VM_ZONE_STAT_I 909 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; 902 int global_node_diff[NR_VM_NODE_STAT_I 910 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; 903 911 904 for_each_populated_zone(zone) { 912 for_each_populated_zone(zone) { 905 struct per_cpu_zonestat *pzsta 913 struct per_cpu_zonestat *pzstats; 906 914 907 pzstats = per_cpu_ptr(zone->pe 915 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 908 916 909 for (i = 0; i < NR_VM_ZONE_STA 917 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 910 if (pzstats->vm_stat_d 918 if (pzstats->vm_stat_diff[i]) { 911 int v; 919 int v; 912 920 913 v = pzstats->v 921 v = pzstats->vm_stat_diff[i]; 914 pzstats->vm_st 922 pzstats->vm_stat_diff[i] = 0; 915 atomic_long_ad 923 atomic_long_add(v, &zone->vm_stat[i]); 916 global_zone_di 924 global_zone_diff[i] += v; 917 } 925 } 918 } 926 } 919 #ifdef CONFIG_NUMA 927 #ifdef CONFIG_NUMA 920 for (i = 0; i < NR_VM_NUMA_EVE 928 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { 921 if (pzstats->vm_numa_e 929 if (pzstats->vm_numa_event[i]) { 922 unsigned long 930 unsigned long v; 923 931 924 v = pzstats->v 932 v = pzstats->vm_numa_event[i]; 925 pzstats->vm_nu 933 pzstats->vm_numa_event[i] = 0; 926 zone_numa_even 934 zone_numa_event_add(v, zone, i); 927 } 935 } 928 } 936 } 929 #endif 937 #endif 930 } 938 } 931 939 932 for_each_online_pgdat(pgdat) { 940 for_each_online_pgdat(pgdat) { 933 struct per_cpu_nodestat *p; 941 struct per_cpu_nodestat *p; 934 942 935 p = per_cpu_ptr(pgdat->per_cpu 943 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu); 936 944 937 for (i = 0; i < NR_VM_NODE_STA 945 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 938 if (p->vm_node_stat_di 946 if (p->vm_node_stat_diff[i]) { 939 int v; 947 int v; 940 948 941 v = p->vm_node 949 v = p->vm_node_stat_diff[i]; 942 p->vm_node_sta 950 p->vm_node_stat_diff[i] = 0; 943 atomic_long_ad 951 atomic_long_add(v, &pgdat->vm_stat[i]); 944 global_node_di 952 global_node_diff[i] += v; 945 } 953 } 946 } 954 } 947 955 948 fold_diff(global_zone_diff, global_nod 956 fold_diff(global_zone_diff, global_node_diff); 949 } 957 } 950 958 951 /* 959 /* 952 * this is only called if !populated_zone(zone 960 * this is only called if !populated_zone(zone), which implies no other users of 953 * pset->vm_stat_diff[] exist. 961 * pset->vm_stat_diff[] exist. 954 */ 962 */ 955 void drain_zonestat(struct zone *zone, struct 963 void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats) 956 { 964 { 957 unsigned long v; 965 unsigned long v; 958 int i; 966 int i; 959 967 960 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; 968 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 961 if (pzstats->vm_stat_diff[i]) 969 if (pzstats->vm_stat_diff[i]) { 962 v = pzstats->vm_stat_d 970 v = pzstats->vm_stat_diff[i]; 963 pzstats->vm_stat_diff[ 971 pzstats->vm_stat_diff[i] = 0; 964 zone_page_state_add(v, 972 zone_page_state_add(v, zone, i); 965 } 973 } 966 } 974 } 967 975 968 #ifdef CONFIG_NUMA 976 #ifdef CONFIG_NUMA 969 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS 977 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) { 970 if (pzstats->vm_numa_event[i]) 978 if (pzstats->vm_numa_event[i]) { 971 v = pzstats->vm_numa_e 979 v = pzstats->vm_numa_event[i]; 972 pzstats->vm_numa_event 980 pzstats->vm_numa_event[i] = 0; 973 zone_numa_event_add(v, 981 zone_numa_event_add(v, zone, i); 974 } 982 } 975 } 983 } 976 #endif 984 #endif 977 } 985 } 978 #endif 986 #endif 979 987 980 #ifdef CONFIG_NUMA 988 #ifdef CONFIG_NUMA 981 /* 989 /* 982 * Determine the per node value of a stat item 990 * Determine the per node value of a stat item. This function 983 * is called frequently in a NUMA machine, so 991 * is called frequently in a NUMA machine, so try to be as 984 * frugal as possible. 992 * frugal as possible. 985 */ 993 */ 986 unsigned long sum_zone_node_page_state(int nod 994 unsigned long sum_zone_node_page_state(int node, 987 enum zone_sta 995 enum zone_stat_item item) 988 { 996 { 989 struct zone *zones = NODE_DATA(node)-> 997 struct zone *zones = NODE_DATA(node)->node_zones; 990 int i; 998 int i; 991 unsigned long count = 0; 999 unsigned long count = 0; 992 1000 993 for (i = 0; i < MAX_NR_ZONES; i++) 1001 for (i = 0; i < MAX_NR_ZONES; i++) 994 count += zone_page_state(zones 1002 count += zone_page_state(zones + i, item); 995 1003 996 return count; 1004 return count; 997 } 1005 } 998 1006 999 /* Determine the per node value of a numa stat 1007 /* Determine the per node value of a numa stat item. */ 1000 unsigned long sum_zone_numa_event_state(int n 1008 unsigned long sum_zone_numa_event_state(int node, 1001 enum numa_st 1009 enum numa_stat_item item) 1002 { 1010 { 1003 struct zone *zones = NODE_DATA(node)- 1011 struct zone *zones = NODE_DATA(node)->node_zones; 1004 unsigned long count = 0; 1012 unsigned long count = 0; 1005 int i; 1013 int i; 1006 1014 1007 for (i = 0; i < MAX_NR_ZONES; i++) 1015 for (i = 0; i < MAX_NR_ZONES; i++) 1008 count += zone_numa_event_stat 1016 count += zone_numa_event_state(zones + i, item); 1009 1017 1010 return count; 1018 return count; 1011 } 1019 } 1012 1020 1013 /* 1021 /* 1014 * Determine the per node value of a stat ite 1022 * Determine the per node value of a stat item. 1015 */ 1023 */ 1016 unsigned long node_page_state_pages(struct pg 1024 unsigned long node_page_state_pages(struct pglist_data *pgdat, 1017 enum node 1025 enum node_stat_item item) 1018 { 1026 { 1019 long x = atomic_long_read(&pgdat->vm_ 1027 long x = atomic_long_read(&pgdat->vm_stat[item]); 1020 #ifdef CONFIG_SMP 1028 #ifdef CONFIG_SMP 1021 if (x < 0) 1029 if (x < 0) 1022 x = 0; 1030 x = 0; 1023 #endif 1031 #endif 1024 return x; 1032 return x; 1025 } 1033 } 1026 1034 1027 unsigned long node_page_state(struct pglist_d 1035 unsigned long node_page_state(struct pglist_data *pgdat, 1028 enum node_stat_ 1036 enum node_stat_item item) 1029 { 1037 { 1030 VM_WARN_ON_ONCE(vmstat_item_in_bytes( 1038 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); 1031 1039 1032 return node_page_state_pages(pgdat, i 1040 return node_page_state_pages(pgdat, item); 1033 } 1041 } 1034 #endif 1042 #endif 1035 1043 1036 /* << 1037 * Count number of pages "struct page" and "s << 1038 * nr_memmap_boot_pages: # of pages allocated << 1039 * nr_memmap_pages: # of pages that were allo << 1040 */ << 1041 static atomic_long_t nr_memmap_boot_pages = A << 1042 static atomic_long_t nr_memmap_pages = ATOMIC << 1043 << 1044 void memmap_boot_pages_add(long delta) << 1045 { << 1046 atomic_long_add(delta, &nr_memmap_boo << 1047 } << 1048 << 1049 void memmap_pages_add(long delta) << 1050 { << 1051 atomic_long_add(delta, &nr_memmap_pag << 1052 } << 1053 << 1054 #ifdef CONFIG_COMPACTION 1044 #ifdef CONFIG_COMPACTION 1055 1045 1056 struct contig_page_info { 1046 struct contig_page_info { 1057 unsigned long free_pages; 1047 unsigned long free_pages; 1058 unsigned long free_blocks_total; 1048 unsigned long free_blocks_total; 1059 unsigned long free_blocks_suitable; 1049 unsigned long free_blocks_suitable; 1060 }; 1050 }; 1061 1051 1062 /* 1052 /* 1063 * Calculate the number of free pages in a zo 1053 * Calculate the number of free pages in a zone, how many contiguous 1064 * pages are free and how many are large enou 1054 * pages are free and how many are large enough to satisfy an allocation of 1065 * the target size. Note that this function m 1055 * the target size. Note that this function makes no attempt to estimate 1066 * how many suitable free blocks there *might 1056 * how many suitable free blocks there *might* be if MOVABLE pages were 1067 * migrated. Calculating that is possible, bu 1057 * migrated. Calculating that is possible, but expensive and can be 1068 * figured out from userspace 1058 * figured out from userspace 1069 */ 1059 */ 1070 static void fill_contig_page_info(struct zone 1060 static void fill_contig_page_info(struct zone *zone, 1071 unsigned int 1061 unsigned int suitable_order, 1072 struct contig 1062 struct contig_page_info *info) 1073 { 1063 { 1074 unsigned int order; 1064 unsigned int order; 1075 1065 1076 info->free_pages = 0; 1066 info->free_pages = 0; 1077 info->free_blocks_total = 0; 1067 info->free_blocks_total = 0; 1078 info->free_blocks_suitable = 0; 1068 info->free_blocks_suitable = 0; 1079 1069 1080 for (order = 0; order < NR_PAGE_ORDER !! 1070 for (order = 0; order < MAX_ORDER; order++) { 1081 unsigned long blocks; 1071 unsigned long blocks; 1082 1072 1083 /* !! 1073 /* Count number of free blocks */ 1084 * Count number of free block !! 1074 blocks = zone->free_area[order].nr_free; 1085 * << 1086 * Access to nr_free is lockl << 1087 * diagnostic purposes. Use d << 1088 */ << 1089 blocks = data_race(zone->free << 1090 info->free_blocks_total += bl 1075 info->free_blocks_total += blocks; 1091 1076 1092 /* Count free base pages */ 1077 /* Count free base pages */ 1093 info->free_pages += blocks << 1078 info->free_pages += blocks << order; 1094 1079 1095 /* Count the suitable free bl 1080 /* Count the suitable free blocks */ 1096 if (order >= suitable_order) 1081 if (order >= suitable_order) 1097 info->free_blocks_sui 1082 info->free_blocks_suitable += blocks << 1098 1083 (order - suitable_order); 1099 } 1084 } 1100 } 1085 } 1101 1086 1102 /* 1087 /* 1103 * A fragmentation index only makes sense if 1088 * A fragmentation index only makes sense if an allocation of a requested 1104 * size would fail. If that is true, the frag 1089 * size would fail. If that is true, the fragmentation index indicates 1105 * whether external fragmentation or a lack o 1090 * whether external fragmentation or a lack of memory was the problem. 1106 * The value can be used to determine if page 1091 * The value can be used to determine if page reclaim or compaction 1107 * should be used 1092 * should be used 1108 */ 1093 */ 1109 static int __fragmentation_index(unsigned int 1094 static int __fragmentation_index(unsigned int order, struct contig_page_info *info) 1110 { 1095 { 1111 unsigned long requested = 1UL << orde 1096 unsigned long requested = 1UL << order; 1112 1097 1113 if (WARN_ON_ONCE(order > MAX_PAGE_ORD !! 1098 if (WARN_ON_ONCE(order >= MAX_ORDER)) 1114 return 0; 1099 return 0; 1115 1100 1116 if (!info->free_blocks_total) 1101 if (!info->free_blocks_total) 1117 return 0; 1102 return 0; 1118 1103 1119 /* Fragmentation index only makes sen 1104 /* Fragmentation index only makes sense when a request would fail */ 1120 if (info->free_blocks_suitable) 1105 if (info->free_blocks_suitable) 1121 return -1000; 1106 return -1000; 1122 1107 1123 /* 1108 /* 1124 * Index is between 0 and 1 so return 1109 * Index is between 0 and 1 so return within 3 decimal places 1125 * 1110 * 1126 * 0 => allocation would fail due to 1111 * 0 => allocation would fail due to lack of memory 1127 * 1 => allocation would fail due to 1112 * 1 => allocation would fail due to fragmentation 1128 */ 1113 */ 1129 return 1000 - div_u64( (1000+(div_u64 1114 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total); 1130 } 1115 } 1131 1116 1132 /* 1117 /* 1133 * Calculates external fragmentation within a 1118 * Calculates external fragmentation within a zone wrt the given order. 1134 * It is defined as the percentage of pages f 1119 * It is defined as the percentage of pages found in blocks of size 1135 * less than 1 << order. It returns values in 1120 * less than 1 << order. It returns values in range [0, 100]. 1136 */ 1121 */ 1137 unsigned int extfrag_for_order(struct zone *z 1122 unsigned int extfrag_for_order(struct zone *zone, unsigned int order) 1138 { 1123 { 1139 struct contig_page_info info; 1124 struct contig_page_info info; 1140 1125 1141 fill_contig_page_info(zone, order, &i 1126 fill_contig_page_info(zone, order, &info); 1142 if (info.free_pages == 0) 1127 if (info.free_pages == 0) 1143 return 0; 1128 return 0; 1144 1129 1145 return div_u64((info.free_pages - 1130 return div_u64((info.free_pages - 1146 (info.free_blocks_sui 1131 (info.free_blocks_suitable << order)) * 100, 1147 info.free_pages); 1132 info.free_pages); 1148 } 1133 } 1149 1134 1150 /* Same as __fragmentation index but allocs c 1135 /* Same as __fragmentation index but allocs contig_page_info on stack */ 1151 int fragmentation_index(struct zone *zone, un 1136 int fragmentation_index(struct zone *zone, unsigned int order) 1152 { 1137 { 1153 struct contig_page_info info; 1138 struct contig_page_info info; 1154 1139 1155 fill_contig_page_info(zone, order, &i 1140 fill_contig_page_info(zone, order, &info); 1156 return __fragmentation_index(order, & 1141 return __fragmentation_index(order, &info); 1157 } 1142 } 1158 #endif 1143 #endif 1159 1144 1160 #if defined(CONFIG_PROC_FS) || defined(CONFIG 1145 #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \ 1161 defined(CONFIG_NUMA) || defined(CONFIG_ME 1146 defined(CONFIG_NUMA) || defined(CONFIG_MEMCG) 1162 #ifdef CONFIG_ZONE_DMA 1147 #ifdef CONFIG_ZONE_DMA 1163 #define TEXT_FOR_DMA(xx) xx "_dma", 1148 #define TEXT_FOR_DMA(xx) xx "_dma", 1164 #else 1149 #else 1165 #define TEXT_FOR_DMA(xx) 1150 #define TEXT_FOR_DMA(xx) 1166 #endif 1151 #endif 1167 1152 1168 #ifdef CONFIG_ZONE_DMA32 1153 #ifdef CONFIG_ZONE_DMA32 1169 #define TEXT_FOR_DMA32(xx) xx "_dma32", 1154 #define TEXT_FOR_DMA32(xx) xx "_dma32", 1170 #else 1155 #else 1171 #define TEXT_FOR_DMA32(xx) 1156 #define TEXT_FOR_DMA32(xx) 1172 #endif 1157 #endif 1173 1158 1174 #ifdef CONFIG_HIGHMEM 1159 #ifdef CONFIG_HIGHMEM 1175 #define TEXT_FOR_HIGHMEM(xx) xx "_high", 1160 #define TEXT_FOR_HIGHMEM(xx) xx "_high", 1176 #else 1161 #else 1177 #define TEXT_FOR_HIGHMEM(xx) 1162 #define TEXT_FOR_HIGHMEM(xx) 1178 #endif 1163 #endif 1179 1164 1180 #ifdef CONFIG_ZONE_DEVICE << 1181 #define TEXT_FOR_DEVICE(xx) xx "_device", << 1182 #else << 1183 #define TEXT_FOR_DEVICE(xx) << 1184 #endif << 1185 << 1186 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) 1165 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \ 1187 TEXT_ !! 1166 TEXT_FOR_HIGHMEM(xx) xx "_movable", 1188 TEXT_ << 1189 1167 1190 const char * const vmstat_text[] = { 1168 const char * const vmstat_text[] = { 1191 /* enum zone_stat_item counters */ 1169 /* enum zone_stat_item counters */ 1192 "nr_free_pages", 1170 "nr_free_pages", 1193 "nr_zone_inactive_anon", 1171 "nr_zone_inactive_anon", 1194 "nr_zone_active_anon", 1172 "nr_zone_active_anon", 1195 "nr_zone_inactive_file", 1173 "nr_zone_inactive_file", 1196 "nr_zone_active_file", 1174 "nr_zone_active_file", 1197 "nr_zone_unevictable", 1175 "nr_zone_unevictable", 1198 "nr_zone_write_pending", 1176 "nr_zone_write_pending", 1199 "nr_mlock", 1177 "nr_mlock", 1200 "nr_bounce", 1178 "nr_bounce", 1201 #if IS_ENABLED(CONFIG_ZSMALLOC) 1179 #if IS_ENABLED(CONFIG_ZSMALLOC) 1202 "nr_zspages", 1180 "nr_zspages", 1203 #endif 1181 #endif 1204 "nr_free_cma", 1182 "nr_free_cma", 1205 #ifdef CONFIG_UNACCEPTED_MEMORY << 1206 "nr_unaccepted", << 1207 #endif << 1208 1183 1209 /* enum numa_stat_item counters */ 1184 /* enum numa_stat_item counters */ 1210 #ifdef CONFIG_NUMA 1185 #ifdef CONFIG_NUMA 1211 "numa_hit", 1186 "numa_hit", 1212 "numa_miss", 1187 "numa_miss", 1213 "numa_foreign", 1188 "numa_foreign", 1214 "numa_interleave", 1189 "numa_interleave", 1215 "numa_local", 1190 "numa_local", 1216 "numa_other", 1191 "numa_other", 1217 #endif 1192 #endif 1218 1193 1219 /* enum node_stat_item counters */ 1194 /* enum node_stat_item counters */ 1220 "nr_inactive_anon", 1195 "nr_inactive_anon", 1221 "nr_active_anon", 1196 "nr_active_anon", 1222 "nr_inactive_file", 1197 "nr_inactive_file", 1223 "nr_active_file", 1198 "nr_active_file", 1224 "nr_unevictable", 1199 "nr_unevictable", 1225 "nr_slab_reclaimable", 1200 "nr_slab_reclaimable", 1226 "nr_slab_unreclaimable", 1201 "nr_slab_unreclaimable", 1227 "nr_isolated_anon", 1202 "nr_isolated_anon", 1228 "nr_isolated_file", 1203 "nr_isolated_file", 1229 "workingset_nodes", 1204 "workingset_nodes", 1230 "workingset_refault_anon", 1205 "workingset_refault_anon", 1231 "workingset_refault_file", 1206 "workingset_refault_file", 1232 "workingset_activate_anon", 1207 "workingset_activate_anon", 1233 "workingset_activate_file", 1208 "workingset_activate_file", 1234 "workingset_restore_anon", 1209 "workingset_restore_anon", 1235 "workingset_restore_file", 1210 "workingset_restore_file", 1236 "workingset_nodereclaim", 1211 "workingset_nodereclaim", 1237 "nr_anon_pages", 1212 "nr_anon_pages", 1238 "nr_mapped", 1213 "nr_mapped", 1239 "nr_file_pages", 1214 "nr_file_pages", 1240 "nr_dirty", 1215 "nr_dirty", 1241 "nr_writeback", 1216 "nr_writeback", 1242 "nr_writeback_temp", 1217 "nr_writeback_temp", 1243 "nr_shmem", 1218 "nr_shmem", 1244 "nr_shmem_hugepages", 1219 "nr_shmem_hugepages", 1245 "nr_shmem_pmdmapped", 1220 "nr_shmem_pmdmapped", 1246 "nr_file_hugepages", 1221 "nr_file_hugepages", 1247 "nr_file_pmdmapped", 1222 "nr_file_pmdmapped", 1248 "nr_anon_transparent_hugepages", 1223 "nr_anon_transparent_hugepages", 1249 "nr_vmscan_write", 1224 "nr_vmscan_write", 1250 "nr_vmscan_immediate_reclaim", 1225 "nr_vmscan_immediate_reclaim", 1251 "nr_dirtied", 1226 "nr_dirtied", 1252 "nr_written", 1227 "nr_written", 1253 "nr_throttled_written", << 1254 "nr_kernel_misc_reclaimable", 1228 "nr_kernel_misc_reclaimable", 1255 "nr_foll_pin_acquired", 1229 "nr_foll_pin_acquired", 1256 "nr_foll_pin_released", 1230 "nr_foll_pin_released", 1257 "nr_kernel_stack", 1231 "nr_kernel_stack", 1258 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) 1232 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) 1259 "nr_shadow_call_stack", 1233 "nr_shadow_call_stack", 1260 #endif 1234 #endif 1261 "nr_page_table_pages", 1235 "nr_page_table_pages", 1262 "nr_sec_page_table_pages", << 1263 #ifdef CONFIG_IOMMU_SUPPORT << 1264 "nr_iommu_pages", << 1265 #endif << 1266 #ifdef CONFIG_SWAP 1236 #ifdef CONFIG_SWAP 1267 "nr_swapcached", 1237 "nr_swapcached", 1268 #endif 1238 #endif 1269 #ifdef CONFIG_NUMA_BALANCING !! 1239 1270 "pgpromote_success", !! 1240 /* enum writeback_stat_item counters */ 1271 "pgpromote_candidate", << 1272 #endif << 1273 "pgdemote_kswapd", << 1274 "pgdemote_direct", << 1275 "pgdemote_khugepaged", << 1276 /* system-wide enum vm_stat_item coun << 1277 "nr_dirty_threshold", 1241 "nr_dirty_threshold", 1278 "nr_dirty_background_threshold", 1242 "nr_dirty_background_threshold", 1279 "nr_memmap_pages", << 1280 "nr_memmap_boot_pages", << 1281 1243 1282 #if defined(CONFIG_VM_EVENT_COUNTERS) || defi 1244 #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) 1283 /* enum vm_event_item counters */ 1245 /* enum vm_event_item counters */ 1284 "pgpgin", 1246 "pgpgin", 1285 "pgpgout", 1247 "pgpgout", 1286 "pswpin", 1248 "pswpin", 1287 "pswpout", 1249 "pswpout", 1288 1250 1289 TEXTS_FOR_ZONES("pgalloc") 1251 TEXTS_FOR_ZONES("pgalloc") 1290 TEXTS_FOR_ZONES("allocstall") 1252 TEXTS_FOR_ZONES("allocstall") 1291 TEXTS_FOR_ZONES("pgskip") 1253 TEXTS_FOR_ZONES("pgskip") 1292 1254 1293 "pgfree", 1255 "pgfree", 1294 "pgactivate", 1256 "pgactivate", 1295 "pgdeactivate", 1257 "pgdeactivate", 1296 "pglazyfree", 1258 "pglazyfree", 1297 1259 1298 "pgfault", 1260 "pgfault", 1299 "pgmajfault", 1261 "pgmajfault", 1300 "pglazyfreed", 1262 "pglazyfreed", 1301 1263 1302 "pgrefill", 1264 "pgrefill", 1303 "pgreuse", 1265 "pgreuse", 1304 "pgsteal_kswapd", 1266 "pgsteal_kswapd", 1305 "pgsteal_direct", 1267 "pgsteal_direct", 1306 "pgsteal_khugepaged", !! 1268 "pgdemote_kswapd", >> 1269 "pgdemote_direct", 1307 "pgscan_kswapd", 1270 "pgscan_kswapd", 1308 "pgscan_direct", 1271 "pgscan_direct", 1309 "pgscan_khugepaged", << 1310 "pgscan_direct_throttle", 1272 "pgscan_direct_throttle", 1311 "pgscan_anon", 1273 "pgscan_anon", 1312 "pgscan_file", 1274 "pgscan_file", 1313 "pgsteal_anon", 1275 "pgsteal_anon", 1314 "pgsteal_file", 1276 "pgsteal_file", 1315 1277 1316 #ifdef CONFIG_NUMA 1278 #ifdef CONFIG_NUMA 1317 "zone_reclaim_success", << 1318 "zone_reclaim_failed", 1279 "zone_reclaim_failed", 1319 #endif 1280 #endif 1320 "pginodesteal", 1281 "pginodesteal", 1321 "slabs_scanned", 1282 "slabs_scanned", 1322 "kswapd_inodesteal", 1283 "kswapd_inodesteal", 1323 "kswapd_low_wmark_hit_quickly", 1284 "kswapd_low_wmark_hit_quickly", 1324 "kswapd_high_wmark_hit_quickly", 1285 "kswapd_high_wmark_hit_quickly", 1325 "pageoutrun", 1286 "pageoutrun", 1326 1287 1327 "pgrotated", 1288 "pgrotated", 1328 1289 1329 "drop_pagecache", 1290 "drop_pagecache", 1330 "drop_slab", 1291 "drop_slab", 1331 "oom_kill", 1292 "oom_kill", 1332 1293 1333 #ifdef CONFIG_NUMA_BALANCING 1294 #ifdef CONFIG_NUMA_BALANCING 1334 "numa_pte_updates", 1295 "numa_pte_updates", 1335 "numa_huge_pte_updates", 1296 "numa_huge_pte_updates", 1336 "numa_hint_faults", 1297 "numa_hint_faults", 1337 "numa_hint_faults_local", 1298 "numa_hint_faults_local", 1338 "numa_pages_migrated", 1299 "numa_pages_migrated", 1339 #endif 1300 #endif 1340 #ifdef CONFIG_MIGRATION 1301 #ifdef CONFIG_MIGRATION 1341 "pgmigrate_success", 1302 "pgmigrate_success", 1342 "pgmigrate_fail", 1303 "pgmigrate_fail", 1343 "thp_migration_success", 1304 "thp_migration_success", 1344 "thp_migration_fail", 1305 "thp_migration_fail", 1345 "thp_migration_split", 1306 "thp_migration_split", 1346 #endif 1307 #endif 1347 #ifdef CONFIG_COMPACTION 1308 #ifdef CONFIG_COMPACTION 1348 "compact_migrate_scanned", 1309 "compact_migrate_scanned", 1349 "compact_free_scanned", 1310 "compact_free_scanned", 1350 "compact_isolated", 1311 "compact_isolated", 1351 "compact_stall", 1312 "compact_stall", 1352 "compact_fail", 1313 "compact_fail", 1353 "compact_success", 1314 "compact_success", 1354 "compact_daemon_wake", 1315 "compact_daemon_wake", 1355 "compact_daemon_migrate_scanned", 1316 "compact_daemon_migrate_scanned", 1356 "compact_daemon_free_scanned", 1317 "compact_daemon_free_scanned", 1357 #endif 1318 #endif 1358 1319 1359 #ifdef CONFIG_HUGETLB_PAGE 1320 #ifdef CONFIG_HUGETLB_PAGE 1360 "htlb_buddy_alloc_success", 1321 "htlb_buddy_alloc_success", 1361 "htlb_buddy_alloc_fail", 1322 "htlb_buddy_alloc_fail", 1362 #endif 1323 #endif 1363 #ifdef CONFIG_CMA 1324 #ifdef CONFIG_CMA 1364 "cma_alloc_success", 1325 "cma_alloc_success", 1365 "cma_alloc_fail", 1326 "cma_alloc_fail", 1366 #endif 1327 #endif 1367 "unevictable_pgs_culled", 1328 "unevictable_pgs_culled", 1368 "unevictable_pgs_scanned", 1329 "unevictable_pgs_scanned", 1369 "unevictable_pgs_rescued", 1330 "unevictable_pgs_rescued", 1370 "unevictable_pgs_mlocked", 1331 "unevictable_pgs_mlocked", 1371 "unevictable_pgs_munlocked", 1332 "unevictable_pgs_munlocked", 1372 "unevictable_pgs_cleared", 1333 "unevictable_pgs_cleared", 1373 "unevictable_pgs_stranded", 1334 "unevictable_pgs_stranded", 1374 1335 1375 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1336 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1376 "thp_fault_alloc", 1337 "thp_fault_alloc", 1377 "thp_fault_fallback", 1338 "thp_fault_fallback", 1378 "thp_fault_fallback_charge", 1339 "thp_fault_fallback_charge", 1379 "thp_collapse_alloc", 1340 "thp_collapse_alloc", 1380 "thp_collapse_alloc_failed", 1341 "thp_collapse_alloc_failed", 1381 "thp_file_alloc", 1342 "thp_file_alloc", 1382 "thp_file_fallback", 1343 "thp_file_fallback", 1383 "thp_file_fallback_charge", 1344 "thp_file_fallback_charge", 1384 "thp_file_mapped", 1345 "thp_file_mapped", 1385 "thp_split_page", 1346 "thp_split_page", 1386 "thp_split_page_failed", 1347 "thp_split_page_failed", 1387 "thp_deferred_split_page", 1348 "thp_deferred_split_page", 1388 "thp_underused_split_page", << 1389 "thp_split_pmd", 1349 "thp_split_pmd", 1390 "thp_scan_exceed_none_pte", << 1391 "thp_scan_exceed_swap_pte", << 1392 "thp_scan_exceed_share_pte", << 1393 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_ 1350 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 1394 "thp_split_pud", 1351 "thp_split_pud", 1395 #endif 1352 #endif 1396 "thp_zero_page_alloc", 1353 "thp_zero_page_alloc", 1397 "thp_zero_page_alloc_failed", 1354 "thp_zero_page_alloc_failed", 1398 "thp_swpout", 1355 "thp_swpout", 1399 "thp_swpout_fallback", 1356 "thp_swpout_fallback", 1400 #endif 1357 #endif 1401 #ifdef CONFIG_MEMORY_BALLOON 1358 #ifdef CONFIG_MEMORY_BALLOON 1402 "balloon_inflate", 1359 "balloon_inflate", 1403 "balloon_deflate", 1360 "balloon_deflate", 1404 #ifdef CONFIG_BALLOON_COMPACTION 1361 #ifdef CONFIG_BALLOON_COMPACTION 1405 "balloon_migrate", 1362 "balloon_migrate", 1406 #endif 1363 #endif 1407 #endif /* CONFIG_MEMORY_BALLOON */ 1364 #endif /* CONFIG_MEMORY_BALLOON */ 1408 #ifdef CONFIG_DEBUG_TLBFLUSH 1365 #ifdef CONFIG_DEBUG_TLBFLUSH 1409 "nr_tlb_remote_flush", 1366 "nr_tlb_remote_flush", 1410 "nr_tlb_remote_flush_received", 1367 "nr_tlb_remote_flush_received", 1411 "nr_tlb_local_flush_all", 1368 "nr_tlb_local_flush_all", 1412 "nr_tlb_local_flush_one", 1369 "nr_tlb_local_flush_one", 1413 #endif /* CONFIG_DEBUG_TLBFLUSH */ 1370 #endif /* CONFIG_DEBUG_TLBFLUSH */ 1414 1371 >> 1372 #ifdef CONFIG_DEBUG_VM_VMACACHE >> 1373 "vmacache_find_calls", >> 1374 "vmacache_find_hits", >> 1375 #endif 1415 #ifdef CONFIG_SWAP 1376 #ifdef CONFIG_SWAP 1416 "swap_ra", 1377 "swap_ra", 1417 "swap_ra_hit", 1378 "swap_ra_hit", 1418 #ifdef CONFIG_KSM << 1419 "ksm_swpin_copy", << 1420 #endif << 1421 #endif << 1422 #ifdef CONFIG_KSM << 1423 "cow_ksm", << 1424 #endif << 1425 #ifdef CONFIG_ZSWAP << 1426 "zswpin", << 1427 "zswpout", << 1428 "zswpwb", << 1429 #endif 1379 #endif 1430 #ifdef CONFIG_X86 1380 #ifdef CONFIG_X86 1431 "direct_map_level2_splits", 1381 "direct_map_level2_splits", 1432 "direct_map_level3_splits", 1382 "direct_map_level3_splits", 1433 #endif 1383 #endif 1434 #ifdef CONFIG_PER_VMA_LOCK_STATS << 1435 "vma_lock_success", << 1436 "vma_lock_abort", << 1437 "vma_lock_retry", << 1438 "vma_lock_miss", << 1439 #endif << 1440 #ifdef CONFIG_DEBUG_STACK_USAGE << 1441 "kstack_1k", << 1442 #if THREAD_SIZE > 1024 << 1443 "kstack_2k", << 1444 #endif << 1445 #if THREAD_SIZE > 2048 << 1446 "kstack_4k", << 1447 #endif << 1448 #if THREAD_SIZE > 4096 << 1449 "kstack_8k", << 1450 #endif << 1451 #if THREAD_SIZE > 8192 << 1452 "kstack_16k", << 1453 #endif << 1454 #if THREAD_SIZE > 16384 << 1455 "kstack_32k", << 1456 #endif << 1457 #if THREAD_SIZE > 32768 << 1458 "kstack_64k", << 1459 #endif << 1460 #if THREAD_SIZE > 65536 << 1461 "kstack_rest", << 1462 #endif << 1463 #endif << 1464 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_ 1384 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ 1465 }; 1385 }; 1466 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || C 1386 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ 1467 1387 1468 #if (defined(CONFIG_DEBUG_FS) && defined(CONF 1388 #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ 1469 defined(CONFIG_PROC_FS) 1389 defined(CONFIG_PROC_FS) 1470 static void *frag_start(struct seq_file *m, l 1390 static void *frag_start(struct seq_file *m, loff_t *pos) 1471 { 1391 { 1472 pg_data_t *pgdat; 1392 pg_data_t *pgdat; 1473 loff_t node = *pos; 1393 loff_t node = *pos; 1474 1394 1475 for (pgdat = first_online_pgdat(); 1395 for (pgdat = first_online_pgdat(); 1476 pgdat && node; 1396 pgdat && node; 1477 pgdat = next_online_pgdat(pgdat) 1397 pgdat = next_online_pgdat(pgdat)) 1478 --node; 1398 --node; 1479 1399 1480 return pgdat; 1400 return pgdat; 1481 } 1401 } 1482 1402 1483 static void *frag_next(struct seq_file *m, vo 1403 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) 1484 { 1404 { 1485 pg_data_t *pgdat = (pg_data_t *)arg; 1405 pg_data_t *pgdat = (pg_data_t *)arg; 1486 1406 1487 (*pos)++; 1407 (*pos)++; 1488 return next_online_pgdat(pgdat); 1408 return next_online_pgdat(pgdat); 1489 } 1409 } 1490 1410 1491 static void frag_stop(struct seq_file *m, voi 1411 static void frag_stop(struct seq_file *m, void *arg) 1492 { 1412 { 1493 } 1413 } 1494 1414 1495 /* 1415 /* 1496 * Walk zones in a node and print using a cal 1416 * Walk zones in a node and print using a callback. 1497 * If @assert_populated is true, only use cal 1417 * If @assert_populated is true, only use callback for zones that are populated. 1498 */ 1418 */ 1499 static void walk_zones_in_node(struct seq_fil 1419 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat, 1500 bool assert_populated, bool n 1420 bool assert_populated, bool nolock, 1501 void (*print)(struct seq_file 1421 void (*print)(struct seq_file *m, pg_data_t *, struct zone *)) 1502 { 1422 { 1503 struct zone *zone; 1423 struct zone *zone; 1504 struct zone *node_zones = pgdat->node 1424 struct zone *node_zones = pgdat->node_zones; 1505 unsigned long flags; 1425 unsigned long flags; 1506 1426 1507 for (zone = node_zones; zone - node_z 1427 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { 1508 if (assert_populated && !popu 1428 if (assert_populated && !populated_zone(zone)) 1509 continue; 1429 continue; 1510 1430 1511 if (!nolock) 1431 if (!nolock) 1512 spin_lock_irqsave(&zo 1432 spin_lock_irqsave(&zone->lock, flags); 1513 print(m, pgdat, zone); 1433 print(m, pgdat, zone); 1514 if (!nolock) 1434 if (!nolock) 1515 spin_unlock_irqrestor 1435 spin_unlock_irqrestore(&zone->lock, flags); 1516 } 1436 } 1517 } 1437 } 1518 #endif 1438 #endif 1519 1439 1520 #ifdef CONFIG_PROC_FS 1440 #ifdef CONFIG_PROC_FS 1521 static void frag_show_print(struct seq_file * 1441 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 1522 1442 struct zone *zone) 1523 { 1443 { 1524 int order; 1444 int order; 1525 1445 1526 seq_printf(m, "Node %d, zone %8s ", p 1446 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1527 for (order = 0; order < NR_PAGE_ORDER !! 1447 for (order = 0; order < MAX_ORDER; ++order) 1528 /* !! 1448 seq_printf(m, "%6lu ", zone->free_area[order].nr_free); 1529 * Access to nr_free is lockl << 1530 * printing purposes. Use dat << 1531 */ << 1532 seq_printf(m, "%6lu ", data_r << 1533 seq_putc(m, '\n'); 1449 seq_putc(m, '\n'); 1534 } 1450 } 1535 1451 1536 /* 1452 /* 1537 * This walks the free areas for each zone. 1453 * This walks the free areas for each zone. 1538 */ 1454 */ 1539 static int frag_show(struct seq_file *m, void 1455 static int frag_show(struct seq_file *m, void *arg) 1540 { 1456 { 1541 pg_data_t *pgdat = (pg_data_t *)arg; 1457 pg_data_t *pgdat = (pg_data_t *)arg; 1542 walk_zones_in_node(m, pgdat, true, fa 1458 walk_zones_in_node(m, pgdat, true, false, frag_show_print); 1543 return 0; 1459 return 0; 1544 } 1460 } 1545 1461 1546 static void pagetypeinfo_showfree_print(struc 1462 static void pagetypeinfo_showfree_print(struct seq_file *m, 1547 pg_da 1463 pg_data_t *pgdat, struct zone *zone) 1548 { 1464 { 1549 int order, mtype; 1465 int order, mtype; 1550 1466 1551 for (mtype = 0; mtype < MIGRATE_TYPES 1467 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) { 1552 seq_printf(m, "Node %4d, zone 1468 seq_printf(m, "Node %4d, zone %8s, type %12s ", 1553 pgdat 1469 pgdat->node_id, 1554 zone- 1470 zone->name, 1555 migra 1471 migratetype_names[mtype]); 1556 for (order = 0; order < NR_PA !! 1472 for (order = 0; order < MAX_ORDER; ++order) { 1557 unsigned long freecou 1473 unsigned long freecount = 0; 1558 struct free_area *are 1474 struct free_area *area; 1559 struct list_head *cur 1475 struct list_head *curr; 1560 bool overflow = false 1476 bool overflow = false; 1561 1477 1562 area = &(zone->free_a 1478 area = &(zone->free_area[order]); 1563 1479 1564 list_for_each(curr, & 1480 list_for_each(curr, &area->free_list[mtype]) { 1565 /* 1481 /* 1566 * Cap the fr 1482 * Cap the free_list iteration because it might 1567 * be really 1483 * be really large and we are under a spinlock 1568 * so a long 1484 * so a long time spent here could trigger a 1569 * hard locku 1485 * hard lockup detector. Anyway this is a 1570 * debugging 1486 * debugging tool so knowing there is a handful 1571 * of pages o 1487 * of pages of this order should be more than 1572 * sufficient 1488 * sufficient. 1573 */ 1489 */ 1574 if (++freecou 1490 if (++freecount >= 100000) { 1575 overf 1491 overflow = true; 1576 break 1492 break; 1577 } 1493 } 1578 } 1494 } 1579 seq_printf(m, "%s%6lu 1495 seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount); 1580 spin_unlock_irq(&zone 1496 spin_unlock_irq(&zone->lock); 1581 cond_resched(); 1497 cond_resched(); 1582 spin_lock_irq(&zone-> 1498 spin_lock_irq(&zone->lock); 1583 } 1499 } 1584 seq_putc(m, '\n'); 1500 seq_putc(m, '\n'); 1585 } 1501 } 1586 } 1502 } 1587 1503 1588 /* Print out the free pages at each order for 1504 /* Print out the free pages at each order for each migatetype */ 1589 static void pagetypeinfo_showfree(struct seq_ 1505 static void pagetypeinfo_showfree(struct seq_file *m, void *arg) 1590 { 1506 { 1591 int order; 1507 int order; 1592 pg_data_t *pgdat = (pg_data_t *)arg; 1508 pg_data_t *pgdat = (pg_data_t *)arg; 1593 1509 1594 /* Print header */ 1510 /* Print header */ 1595 seq_printf(m, "%-43s ", "Free pages c 1511 seq_printf(m, "%-43s ", "Free pages count per migrate type at order"); 1596 for (order = 0; order < NR_PAGE_ORDER !! 1512 for (order = 0; order < MAX_ORDER; ++order) 1597 seq_printf(m, "%6d ", order); 1513 seq_printf(m, "%6d ", order); 1598 seq_putc(m, '\n'); 1514 seq_putc(m, '\n'); 1599 1515 1600 walk_zones_in_node(m, pgdat, true, fa 1516 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print); 1601 } 1517 } 1602 1518 1603 static void pagetypeinfo_showblockcount_print 1519 static void pagetypeinfo_showblockcount_print(struct seq_file *m, 1604 pg_da 1520 pg_data_t *pgdat, struct zone *zone) 1605 { 1521 { 1606 int mtype; 1522 int mtype; 1607 unsigned long pfn; 1523 unsigned long pfn; 1608 unsigned long start_pfn = zone->zone_ 1524 unsigned long start_pfn = zone->zone_start_pfn; 1609 unsigned long end_pfn = zone_end_pfn( 1525 unsigned long end_pfn = zone_end_pfn(zone); 1610 unsigned long count[MIGRATE_TYPES] = 1526 unsigned long count[MIGRATE_TYPES] = { 0, }; 1611 1527 1612 for (pfn = start_pfn; pfn < end_pfn; 1528 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 1613 struct page *page; 1529 struct page *page; 1614 1530 1615 page = pfn_to_online_page(pfn 1531 page = pfn_to_online_page(pfn); 1616 if (!page) 1532 if (!page) 1617 continue; 1533 continue; 1618 1534 1619 if (page_zone(page) != zone) 1535 if (page_zone(page) != zone) 1620 continue; 1536 continue; 1621 1537 1622 mtype = get_pageblock_migrate 1538 mtype = get_pageblock_migratetype(page); 1623 1539 1624 if (mtype < MIGRATE_TYPES) 1540 if (mtype < MIGRATE_TYPES) 1625 count[mtype]++; 1541 count[mtype]++; 1626 } 1542 } 1627 1543 1628 /* Print counts */ 1544 /* Print counts */ 1629 seq_printf(m, "Node %d, zone %8s ", p 1545 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); 1630 for (mtype = 0; mtype < MIGRATE_TYPES 1546 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1631 seq_printf(m, "%12lu ", count 1547 seq_printf(m, "%12lu ", count[mtype]); 1632 seq_putc(m, '\n'); 1548 seq_putc(m, '\n'); 1633 } 1549 } 1634 1550 1635 /* Print out the number of pageblocks for eac 1551 /* Print out the number of pageblocks for each migratetype */ 1636 static void pagetypeinfo_showblockcount(struc 1552 static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg) 1637 { 1553 { 1638 int mtype; 1554 int mtype; 1639 pg_data_t *pgdat = (pg_data_t *)arg; 1555 pg_data_t *pgdat = (pg_data_t *)arg; 1640 1556 1641 seq_printf(m, "\n%-23s", "Number of b 1557 seq_printf(m, "\n%-23s", "Number of blocks type "); 1642 for (mtype = 0; mtype < MIGRATE_TYPES 1558 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1643 seq_printf(m, "%12s ", migrat 1559 seq_printf(m, "%12s ", migratetype_names[mtype]); 1644 seq_putc(m, '\n'); 1560 seq_putc(m, '\n'); 1645 walk_zones_in_node(m, pgdat, true, fa 1561 walk_zones_in_node(m, pgdat, true, false, 1646 pagetypeinfo_showblockcount_p 1562 pagetypeinfo_showblockcount_print); 1647 } 1563 } 1648 1564 1649 /* 1565 /* 1650 * Print out the number of pageblocks for eac 1566 * Print out the number of pageblocks for each migratetype that contain pages 1651 * of other types. This gives an indication o 1567 * of other types. This gives an indication of how well fallbacks are being 1652 * contained by rmqueue_fallback(). It requir 1568 * contained by rmqueue_fallback(). It requires information from PAGE_OWNER 1653 * to determine what is going on 1569 * to determine what is going on 1654 */ 1570 */ 1655 static void pagetypeinfo_showmixedcount(struc 1571 static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat) 1656 { 1572 { 1657 #ifdef CONFIG_PAGE_OWNER 1573 #ifdef CONFIG_PAGE_OWNER 1658 int mtype; 1574 int mtype; 1659 1575 1660 if (!static_branch_unlikely(&page_own 1576 if (!static_branch_unlikely(&page_owner_inited)) 1661 return; 1577 return; 1662 1578 1663 drain_all_pages(NULL); 1579 drain_all_pages(NULL); 1664 1580 1665 seq_printf(m, "\n%-23s", "Number of m 1581 seq_printf(m, "\n%-23s", "Number of mixed blocks "); 1666 for (mtype = 0; mtype < MIGRATE_TYPES 1582 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) 1667 seq_printf(m, "%12s ", migrat 1583 seq_printf(m, "%12s ", migratetype_names[mtype]); 1668 seq_putc(m, '\n'); 1584 seq_putc(m, '\n'); 1669 1585 1670 walk_zones_in_node(m, pgdat, true, tr 1586 walk_zones_in_node(m, pgdat, true, true, 1671 pagetypeinfo_showmixedcount_p 1587 pagetypeinfo_showmixedcount_print); 1672 #endif /* CONFIG_PAGE_OWNER */ 1588 #endif /* CONFIG_PAGE_OWNER */ 1673 } 1589 } 1674 1590 1675 /* 1591 /* 1676 * This prints out statistics in relation to 1592 * This prints out statistics in relation to grouping pages by mobility. 1677 * It is expensive to collect so do not const 1593 * It is expensive to collect so do not constantly read the file. 1678 */ 1594 */ 1679 static int pagetypeinfo_show(struct seq_file 1595 static int pagetypeinfo_show(struct seq_file *m, void *arg) 1680 { 1596 { 1681 pg_data_t *pgdat = (pg_data_t *)arg; 1597 pg_data_t *pgdat = (pg_data_t *)arg; 1682 1598 1683 /* check memoryless node */ 1599 /* check memoryless node */ 1684 if (!node_state(pgdat->node_id, N_MEM 1600 if (!node_state(pgdat->node_id, N_MEMORY)) 1685 return 0; 1601 return 0; 1686 1602 1687 seq_printf(m, "Page block order: %d\n 1603 seq_printf(m, "Page block order: %d\n", pageblock_order); 1688 seq_printf(m, "Pages per block: %lu\ 1604 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages); 1689 seq_putc(m, '\n'); 1605 seq_putc(m, '\n'); 1690 pagetypeinfo_showfree(m, pgdat); 1606 pagetypeinfo_showfree(m, pgdat); 1691 pagetypeinfo_showblockcount(m, pgdat) 1607 pagetypeinfo_showblockcount(m, pgdat); 1692 pagetypeinfo_showmixedcount(m, pgdat) 1608 pagetypeinfo_showmixedcount(m, pgdat); 1693 1609 1694 return 0; 1610 return 0; 1695 } 1611 } 1696 1612 1697 static const struct seq_operations fragmentat 1613 static const struct seq_operations fragmentation_op = { 1698 .start = frag_start, 1614 .start = frag_start, 1699 .next = frag_next, 1615 .next = frag_next, 1700 .stop = frag_stop, 1616 .stop = frag_stop, 1701 .show = frag_show, 1617 .show = frag_show, 1702 }; 1618 }; 1703 1619 1704 static const struct seq_operations pagetypein 1620 static const struct seq_operations pagetypeinfo_op = { 1705 .start = frag_start, 1621 .start = frag_start, 1706 .next = frag_next, 1622 .next = frag_next, 1707 .stop = frag_stop, 1623 .stop = frag_stop, 1708 .show = pagetypeinfo_show, 1624 .show = pagetypeinfo_show, 1709 }; 1625 }; 1710 1626 1711 static bool is_zone_first_populated(pg_data_t 1627 static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) 1712 { 1628 { 1713 int zid; 1629 int zid; 1714 1630 1715 for (zid = 0; zid < MAX_NR_ZONES; zid 1631 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 1716 struct zone *compare = &pgdat 1632 struct zone *compare = &pgdat->node_zones[zid]; 1717 1633 1718 if (populated_zone(compare)) 1634 if (populated_zone(compare)) 1719 return zone == compar 1635 return zone == compare; 1720 } 1636 } 1721 1637 1722 return false; 1638 return false; 1723 } 1639 } 1724 1640 1725 static void zoneinfo_show_print(struct seq_fi 1641 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, 1726 1642 struct zone *zone) 1727 { 1643 { 1728 int i; 1644 int i; 1729 seq_printf(m, "Node %d, zone %8s", pg 1645 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); 1730 if (is_zone_first_populated(pgdat, zo 1646 if (is_zone_first_populated(pgdat, zone)) { 1731 seq_printf(m, "\n per-node s 1647 seq_printf(m, "\n per-node stats"); 1732 for (i = 0; i < NR_VM_NODE_ST 1648 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1733 unsigned long pages = 1649 unsigned long pages = node_page_state_pages(pgdat, i); 1734 1650 1735 if (vmstat_item_print 1651 if (vmstat_item_print_in_thp(i)) 1736 pages /= HPAG 1652 pages /= HPAGE_PMD_NR; 1737 seq_printf(m, "\n 1653 seq_printf(m, "\n %-12s %lu", node_stat_name(i), 1738 pages); 1654 pages); 1739 } 1655 } 1740 } 1656 } 1741 seq_printf(m, 1657 seq_printf(m, 1742 "\n pages free %lu" 1658 "\n pages free %lu" 1743 "\n boost %lu" << 1744 "\n min %lu" 1659 "\n min %lu" 1745 "\n low %lu" 1660 "\n low %lu" 1746 "\n high %lu" 1661 "\n high %lu" 1747 "\n promo %lu" << 1748 "\n spanned %lu" 1662 "\n spanned %lu" 1749 "\n present %lu" 1663 "\n present %lu" 1750 "\n managed %lu" 1664 "\n managed %lu" 1751 "\n cma %lu", 1665 "\n cma %lu", 1752 zone_page_state(zone, NR_F 1666 zone_page_state(zone, NR_FREE_PAGES), 1753 zone->watermark_boost, << 1754 min_wmark_pages(zone), 1667 min_wmark_pages(zone), 1755 low_wmark_pages(zone), 1668 low_wmark_pages(zone), 1756 high_wmark_pages(zone), 1669 high_wmark_pages(zone), 1757 promo_wmark_pages(zone), << 1758 zone->spanned_pages, 1670 zone->spanned_pages, 1759 zone->present_pages, 1671 zone->present_pages, 1760 zone_managed_pages(zone), 1672 zone_managed_pages(zone), 1761 zone_cma_pages(zone)); 1673 zone_cma_pages(zone)); 1762 1674 1763 seq_printf(m, 1675 seq_printf(m, 1764 "\n protection: (%l 1676 "\n protection: (%ld", 1765 zone->lowmem_reserve[0]); 1677 zone->lowmem_reserve[0]); 1766 for (i = 1; i < ARRAY_SIZE(zone->lowm 1678 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) 1767 seq_printf(m, ", %ld", zone-> 1679 seq_printf(m, ", %ld", zone->lowmem_reserve[i]); 1768 seq_putc(m, ')'); 1680 seq_putc(m, ')'); 1769 1681 1770 /* If unpopulated, no other informati 1682 /* If unpopulated, no other information is useful */ 1771 if (!populated_zone(zone)) { 1683 if (!populated_zone(zone)) { 1772 seq_putc(m, '\n'); 1684 seq_putc(m, '\n'); 1773 return; 1685 return; 1774 } 1686 } 1775 1687 1776 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS 1688 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 1777 seq_printf(m, "\n %-12s 1689 seq_printf(m, "\n %-12s %lu", zone_stat_name(i), 1778 zone_page_state(zo 1690 zone_page_state(zone, i)); 1779 1691 1780 #ifdef CONFIG_NUMA 1692 #ifdef CONFIG_NUMA 1781 for (i = 0; i < NR_VM_NUMA_EVENT_ITEM 1693 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) 1782 seq_printf(m, "\n %-12s 1694 seq_printf(m, "\n %-12s %lu", numa_stat_name(i), 1783 zone_numa_event_st 1695 zone_numa_event_state(zone, i)); 1784 #endif 1696 #endif 1785 1697 1786 seq_printf(m, "\n pagesets"); 1698 seq_printf(m, "\n pagesets"); 1787 for_each_online_cpu(i) { 1699 for_each_online_cpu(i) { 1788 struct per_cpu_pages *pcp; 1700 struct per_cpu_pages *pcp; 1789 struct per_cpu_zonestat __may 1701 struct per_cpu_zonestat __maybe_unused *pzstats; 1790 1702 1791 pcp = per_cpu_ptr(zone->per_c 1703 pcp = per_cpu_ptr(zone->per_cpu_pageset, i); 1792 seq_printf(m, 1704 seq_printf(m, 1793 "\n cpu: %i" 1705 "\n cpu: %i" 1794 "\n c 1706 "\n count: %i" 1795 "\n h 1707 "\n high: %i" 1796 "\n b 1708 "\n batch: %i", 1797 i, 1709 i, 1798 pcp->count, 1710 pcp->count, 1799 pcp->high, 1711 pcp->high, 1800 pcp->batch); 1712 pcp->batch); 1801 #ifdef CONFIG_SMP 1713 #ifdef CONFIG_SMP 1802 pzstats = per_cpu_ptr(zone->p 1714 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i); 1803 seq_printf(m, "\n vm stats t 1715 seq_printf(m, "\n vm stats threshold: %d", 1804 pzstats->stat 1716 pzstats->stat_threshold); 1805 #endif 1717 #endif 1806 } 1718 } 1807 seq_printf(m, 1719 seq_printf(m, 1808 "\n node_unreclaimable: 1720 "\n node_unreclaimable: %u" 1809 "\n start_pfn: 1721 "\n start_pfn: %lu", 1810 pgdat->kswapd_failures >= 1722 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, 1811 zone->zone_start_pfn); 1723 zone->zone_start_pfn); 1812 seq_putc(m, '\n'); 1724 seq_putc(m, '\n'); 1813 } 1725 } 1814 1726 1815 /* 1727 /* 1816 * Output information about zones in @pgdat. 1728 * Output information about zones in @pgdat. All zones are printed regardless 1817 * of whether they are populated or not: lowm 1729 * of whether they are populated or not: lowmem_reserve_ratio operates on the 1818 * set of all zones and userspace would not b 1730 * set of all zones and userspace would not be aware of such zones if they are 1819 * suppressed here (zoneinfo displays the eff 1731 * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio). 1820 */ 1732 */ 1821 static int zoneinfo_show(struct seq_file *m, 1733 static int zoneinfo_show(struct seq_file *m, void *arg) 1822 { 1734 { 1823 pg_data_t *pgdat = (pg_data_t *)arg; 1735 pg_data_t *pgdat = (pg_data_t *)arg; 1824 walk_zones_in_node(m, pgdat, false, f 1736 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print); 1825 return 0; 1737 return 0; 1826 } 1738 } 1827 1739 1828 static const struct seq_operations zoneinfo_o 1740 static const struct seq_operations zoneinfo_op = { 1829 .start = frag_start, /* iterate over 1741 .start = frag_start, /* iterate over all zones. The same as in 1830 * fragmentatio 1742 * fragmentation. */ 1831 .next = frag_next, 1743 .next = frag_next, 1832 .stop = frag_stop, 1744 .stop = frag_stop, 1833 .show = zoneinfo_show, 1745 .show = zoneinfo_show, 1834 }; 1746 }; 1835 1747 1836 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEM 1748 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ 1837 NR_VM_NUMA_EVENT_ITE 1749 NR_VM_NUMA_EVENT_ITEMS + \ 1838 NR_VM_NODE_STAT_ITEM 1750 NR_VM_NODE_STAT_ITEMS + \ 1839 NR_VM_STAT_ITEMS + \ !! 1751 NR_VM_WRITEBACK_STAT_ITEMS + \ 1840 (IS_ENABLED(CONFIG_V 1752 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ 1841 NR_VM_EVENT_ITEMS : 1753 NR_VM_EVENT_ITEMS : 0)) 1842 1754 1843 static void *vmstat_start(struct seq_file *m, 1755 static void *vmstat_start(struct seq_file *m, loff_t *pos) 1844 { 1756 { 1845 unsigned long *v; 1757 unsigned long *v; 1846 int i; 1758 int i; 1847 1759 1848 if (*pos >= NR_VMSTAT_ITEMS) 1760 if (*pos >= NR_VMSTAT_ITEMS) 1849 return NULL; 1761 return NULL; 1850 1762 1851 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) 1763 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS); 1852 fold_vm_numa_events(); 1764 fold_vm_numa_events(); 1853 v = kmalloc_array(NR_VMSTAT_ITEMS, si 1765 v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL); 1854 m->private = v; 1766 m->private = v; 1855 if (!v) 1767 if (!v) 1856 return ERR_PTR(-ENOMEM); 1768 return ERR_PTR(-ENOMEM); 1857 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS 1769 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 1858 v[i] = global_zone_page_state 1770 v[i] = global_zone_page_state(i); 1859 v += NR_VM_ZONE_STAT_ITEMS; 1771 v += NR_VM_ZONE_STAT_ITEMS; 1860 1772 1861 #ifdef CONFIG_NUMA 1773 #ifdef CONFIG_NUMA 1862 for (i = 0; i < NR_VM_NUMA_EVENT_ITEM 1774 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) 1863 v[i] = global_numa_event_stat 1775 v[i] = global_numa_event_state(i); 1864 v += NR_VM_NUMA_EVENT_ITEMS; 1776 v += NR_VM_NUMA_EVENT_ITEMS; 1865 #endif 1777 #endif 1866 1778 1867 for (i = 0; i < NR_VM_NODE_STAT_ITEMS 1779 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1868 v[i] = global_node_page_state 1780 v[i] = global_node_page_state_pages(i); 1869 if (vmstat_item_print_in_thp( 1781 if (vmstat_item_print_in_thp(i)) 1870 v[i] /= HPAGE_PMD_NR; 1782 v[i] /= HPAGE_PMD_NR; 1871 } 1783 } 1872 v += NR_VM_NODE_STAT_ITEMS; 1784 v += NR_VM_NODE_STAT_ITEMS; 1873 1785 1874 global_dirty_limits(v + NR_DIRTY_BG_T 1786 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, 1875 v + NR_DIRTY_THRE 1787 v + NR_DIRTY_THRESHOLD); 1876 v[NR_MEMMAP_PAGES] = atomic_long_read !! 1788 v += NR_VM_WRITEBACK_STAT_ITEMS; 1877 v[NR_MEMMAP_BOOT_PAGES] = atomic_long << 1878 v += NR_VM_STAT_ITEMS; << 1879 1789 1880 #ifdef CONFIG_VM_EVENT_COUNTERS 1790 #ifdef CONFIG_VM_EVENT_COUNTERS 1881 all_vm_events(v); 1791 all_vm_events(v); 1882 v[PGPGIN] /= 2; /* sectors -> 1792 v[PGPGIN] /= 2; /* sectors -> kbytes */ 1883 v[PGPGOUT] /= 2; 1793 v[PGPGOUT] /= 2; 1884 #endif 1794 #endif 1885 return (unsigned long *)m->private + 1795 return (unsigned long *)m->private + *pos; 1886 } 1796 } 1887 1797 1888 static void *vmstat_next(struct seq_file *m, 1798 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) 1889 { 1799 { 1890 (*pos)++; 1800 (*pos)++; 1891 if (*pos >= NR_VMSTAT_ITEMS) 1801 if (*pos >= NR_VMSTAT_ITEMS) 1892 return NULL; 1802 return NULL; 1893 return (unsigned long *)m->private + 1803 return (unsigned long *)m->private + *pos; 1894 } 1804 } 1895 1805 1896 static int vmstat_show(struct seq_file *m, vo 1806 static int vmstat_show(struct seq_file *m, void *arg) 1897 { 1807 { 1898 unsigned long *l = arg; 1808 unsigned long *l = arg; 1899 unsigned long off = l - (unsigned lon 1809 unsigned long off = l - (unsigned long *)m->private; 1900 1810 1901 seq_puts(m, vmstat_text[off]); 1811 seq_puts(m, vmstat_text[off]); 1902 seq_put_decimal_ull(m, " ", *l); 1812 seq_put_decimal_ull(m, " ", *l); 1903 seq_putc(m, '\n'); 1813 seq_putc(m, '\n'); 1904 1814 1905 if (off == NR_VMSTAT_ITEMS - 1) { 1815 if (off == NR_VMSTAT_ITEMS - 1) { 1906 /* 1816 /* 1907 * We've come to the end - ad 1817 * We've come to the end - add any deprecated counters to avoid 1908 * breaking userspace which m 1818 * breaking userspace which might depend on them being present. 1909 */ 1819 */ 1910 seq_puts(m, "nr_unstable 0\n" 1820 seq_puts(m, "nr_unstable 0\n"); 1911 } 1821 } 1912 return 0; 1822 return 0; 1913 } 1823 } 1914 1824 1915 static void vmstat_stop(struct seq_file *m, v 1825 static void vmstat_stop(struct seq_file *m, void *arg) 1916 { 1826 { 1917 kfree(m->private); 1827 kfree(m->private); 1918 m->private = NULL; 1828 m->private = NULL; 1919 } 1829 } 1920 1830 1921 static const struct seq_operations vmstat_op 1831 static const struct seq_operations vmstat_op = { 1922 .start = vmstat_start, 1832 .start = vmstat_start, 1923 .next = vmstat_next, 1833 .next = vmstat_next, 1924 .stop = vmstat_stop, 1834 .stop = vmstat_stop, 1925 .show = vmstat_show, 1835 .show = vmstat_show, 1926 }; 1836 }; 1927 #endif /* CONFIG_PROC_FS */ 1837 #endif /* CONFIG_PROC_FS */ 1928 1838 1929 #ifdef CONFIG_SMP 1839 #ifdef CONFIG_SMP 1930 static DEFINE_PER_CPU(struct delayed_work, vm 1840 static DEFINE_PER_CPU(struct delayed_work, vmstat_work); 1931 int sysctl_stat_interval __read_mostly = HZ; 1841 int sysctl_stat_interval __read_mostly = HZ; 1932 1842 1933 #ifdef CONFIG_PROC_FS 1843 #ifdef CONFIG_PROC_FS 1934 static void refresh_vm_stats(struct work_stru 1844 static void refresh_vm_stats(struct work_struct *work) 1935 { 1845 { 1936 refresh_cpu_vm_stats(true); 1846 refresh_cpu_vm_stats(true); 1937 } 1847 } 1938 1848 1939 int vmstat_refresh(const struct ctl_table *ta !! 1849 int vmstat_refresh(struct ctl_table *table, int write, 1940 void *buffer, size_t *lenp 1850 void *buffer, size_t *lenp, loff_t *ppos) 1941 { 1851 { 1942 long val; 1852 long val; 1943 int err; 1853 int err; 1944 int i; 1854 int i; 1945 1855 1946 /* 1856 /* 1947 * The regular update, every sysctl_s 1857 * The regular update, every sysctl_stat_interval, may come later 1948 * than expected: leaving a significa 1858 * than expected: leaving a significant amount in per_cpu buckets. 1949 * This is particularly misleading wh 1859 * This is particularly misleading when checking a quantity of HUGE 1950 * pages, immediately after running a 1860 * pages, immediately after running a test. /proc/sys/vm/stat_refresh, 1951 * which can equally be echo'ed to or 1861 * which can equally be echo'ed to or cat'ted from (by root), 1952 * can be used to update the stats ju 1862 * can be used to update the stats just before reading them. 1953 * 1863 * 1954 * Oh, and since global_zone_page_sta 1864 * Oh, and since global_zone_page_state() etc. are so careful to hide 1955 * transiently negative values, repor 1865 * transiently negative values, report an error here if any of 1956 * the stats is negative, so we know 1866 * the stats is negative, so we know to go looking for imbalance. 1957 */ 1867 */ 1958 err = schedule_on_each_cpu(refresh_vm 1868 err = schedule_on_each_cpu(refresh_vm_stats); 1959 if (err) 1869 if (err) 1960 return err; 1870 return err; 1961 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS 1871 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) { 1962 /* 1872 /* 1963 * Skip checking stats known 1873 * Skip checking stats known to go negative occasionally. 1964 */ 1874 */ 1965 switch (i) { 1875 switch (i) { 1966 case NR_ZONE_WRITE_PENDING: 1876 case NR_ZONE_WRITE_PENDING: 1967 case NR_FREE_CMA_PAGES: 1877 case NR_FREE_CMA_PAGES: 1968 continue; 1878 continue; 1969 } 1879 } 1970 val = atomic_long_read(&vm_zo 1880 val = atomic_long_read(&vm_zone_stat[i]); 1971 if (val < 0) { 1881 if (val < 0) { 1972 pr_warn("%s: %s %ld\n 1882 pr_warn("%s: %s %ld\n", 1973 __func__, zon 1883 __func__, zone_stat_name(i), val); 1974 } 1884 } 1975 } 1885 } 1976 for (i = 0; i < NR_VM_NODE_STAT_ITEMS 1886 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { 1977 /* 1887 /* 1978 * Skip checking stats known 1888 * Skip checking stats known to go negative occasionally. 1979 */ 1889 */ 1980 switch (i) { 1890 switch (i) { 1981 case NR_WRITEBACK: 1891 case NR_WRITEBACK: 1982 continue; 1892 continue; 1983 } 1893 } 1984 val = atomic_long_read(&vm_no 1894 val = atomic_long_read(&vm_node_stat[i]); 1985 if (val < 0) { 1895 if (val < 0) { 1986 pr_warn("%s: %s %ld\n 1896 pr_warn("%s: %s %ld\n", 1987 __func__, nod 1897 __func__, node_stat_name(i), val); 1988 } 1898 } 1989 } 1899 } 1990 if (write) 1900 if (write) 1991 *ppos += *lenp; 1901 *ppos += *lenp; 1992 else 1902 else 1993 *lenp = 0; 1903 *lenp = 0; 1994 return 0; 1904 return 0; 1995 } 1905 } 1996 #endif /* CONFIG_PROC_FS */ 1906 #endif /* CONFIG_PROC_FS */ 1997 1907 1998 static void vmstat_update(struct work_struct 1908 static void vmstat_update(struct work_struct *w) 1999 { 1909 { 2000 if (refresh_cpu_vm_stats(true)) { 1910 if (refresh_cpu_vm_stats(true)) { 2001 /* 1911 /* 2002 * Counters were updated so w 1912 * Counters were updated so we expect more updates 2003 * to occur in the future. Ke 1913 * to occur in the future. Keep on running the 2004 * update worker thread. 1914 * update worker thread. 2005 */ 1915 */ 2006 queue_delayed_work_on(smp_pro 1916 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, 2007 this_cpu_ptr( 1917 this_cpu_ptr(&vmstat_work), 2008 round_jiffies 1918 round_jiffies_relative(sysctl_stat_interval)); 2009 } 1919 } 2010 } 1920 } 2011 1921 2012 /* 1922 /* 2013 * Check if the diffs for a certain cpu indic 1923 * Check if the diffs for a certain cpu indicate that 2014 * an update is needed. 1924 * an update is needed. 2015 */ 1925 */ 2016 static bool need_update(int cpu) 1926 static bool need_update(int cpu) 2017 { 1927 { 2018 pg_data_t *last_pgdat = NULL; 1928 pg_data_t *last_pgdat = NULL; 2019 struct zone *zone; 1929 struct zone *zone; 2020 1930 2021 for_each_populated_zone(zone) { 1931 for_each_populated_zone(zone) { 2022 struct per_cpu_zonestat *pzst 1932 struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); 2023 struct per_cpu_nodestat *n; 1933 struct per_cpu_nodestat *n; 2024 1934 2025 /* 1935 /* 2026 * The fast way of checking i 1936 * The fast way of checking if there are any vmstat diffs. 2027 */ 1937 */ 2028 if (memchr_inv(pzstats->vm_st 1938 if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff))) 2029 return true; 1939 return true; 2030 1940 2031 if (last_pgdat == zone->zone_ 1941 if (last_pgdat == zone->zone_pgdat) 2032 continue; 1942 continue; 2033 last_pgdat = zone->zone_pgdat 1943 last_pgdat = zone->zone_pgdat; 2034 n = per_cpu_ptr(zone->zone_pg 1944 n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu); 2035 if (memchr_inv(n->vm_node_sta 1945 if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff))) 2036 return true; 1946 return true; 2037 } 1947 } 2038 return false; 1948 return false; 2039 } 1949 } 2040 1950 2041 /* 1951 /* 2042 * Switch off vmstat processing and then fold 1952 * Switch off vmstat processing and then fold all the remaining differentials 2043 * until the diffs stay at zero. The function 1953 * until the diffs stay at zero. The function is used by NOHZ and can only be 2044 * invoked when tick processing is not active 1954 * invoked when tick processing is not active. 2045 */ 1955 */ 2046 void quiet_vmstat(void) 1956 void quiet_vmstat(void) 2047 { 1957 { 2048 if (system_state != SYSTEM_RUNNING) 1958 if (system_state != SYSTEM_RUNNING) 2049 return; 1959 return; 2050 1960 2051 if (!delayed_work_pending(this_cpu_pt 1961 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work))) 2052 return; 1962 return; 2053 1963 2054 if (!need_update(smp_processor_id())) 1964 if (!need_update(smp_processor_id())) 2055 return; 1965 return; 2056 1966 2057 /* 1967 /* 2058 * Just refresh counters and do not c 1968 * Just refresh counters and do not care about the pending delayed 2059 * vmstat_update. It doesn't fire tha 1969 * vmstat_update. It doesn't fire that often to matter and canceling 2060 * it would be too expensive from thi 1970 * it would be too expensive from this path. 2061 * vmstat_shepherd will take care abo 1971 * vmstat_shepherd will take care about that for us. 2062 */ 1972 */ 2063 refresh_cpu_vm_stats(false); 1973 refresh_cpu_vm_stats(false); 2064 } 1974 } 2065 1975 2066 /* 1976 /* 2067 * Shepherd worker thread that checks the 1977 * Shepherd worker thread that checks the 2068 * differentials of processors that have thei 1978 * differentials of processors that have their worker 2069 * threads for vm statistics updates disabled 1979 * threads for vm statistics updates disabled because of 2070 * inactivity. 1980 * inactivity. 2071 */ 1981 */ 2072 static void vmstat_shepherd(struct work_struc 1982 static void vmstat_shepherd(struct work_struct *w); 2073 1983 2074 static DECLARE_DEFERRABLE_WORK(shepherd, vmst 1984 static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd); 2075 1985 2076 static void vmstat_shepherd(struct work_struc 1986 static void vmstat_shepherd(struct work_struct *w) 2077 { 1987 { 2078 int cpu; 1988 int cpu; 2079 1989 2080 cpus_read_lock(); 1990 cpus_read_lock(); 2081 /* Check processors whose vmstat work 1991 /* Check processors whose vmstat worker threads have been disabled */ 2082 for_each_online_cpu(cpu) { 1992 for_each_online_cpu(cpu) { 2083 struct delayed_work *dw = &pe 1993 struct delayed_work *dw = &per_cpu(vmstat_work, cpu); 2084 1994 2085 /* << 2086 * In kernel users of vmstat << 2087 * they are using zone_page_s << 2088 * an imprecision as the regu << 2089 * cumulative error can grow << 2090 * << 2091 * From that POV the regular << 2092 * been isolated from the ker << 2093 * infrastructure ever notici << 2094 * for all isolated CPUs to a << 2095 */ << 2096 if (cpu_is_isolated(cpu)) << 2097 continue; << 2098 << 2099 if (!delayed_work_pending(dw) 1995 if (!delayed_work_pending(dw) && need_update(cpu)) 2100 queue_delayed_work_on 1996 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0); 2101 1997 2102 cond_resched(); 1998 cond_resched(); 2103 } 1999 } 2104 cpus_read_unlock(); 2000 cpus_read_unlock(); 2105 2001 2106 schedule_delayed_work(&shepherd, 2002 schedule_delayed_work(&shepherd, 2107 round_jiffies_relative(sysctl 2003 round_jiffies_relative(sysctl_stat_interval)); 2108 } 2004 } 2109 2005 2110 static void __init start_shepherd_timer(void) 2006 static void __init start_shepherd_timer(void) 2111 { 2007 { 2112 int cpu; 2008 int cpu; 2113 2009 2114 for_each_possible_cpu(cpu) 2010 for_each_possible_cpu(cpu) 2115 INIT_DEFERRABLE_WORK(per_cpu_ 2011 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), 2116 vmstat_update); 2012 vmstat_update); 2117 2013 2118 schedule_delayed_work(&shepherd, 2014 schedule_delayed_work(&shepherd, 2119 round_jiffies_relative(sysctl 2015 round_jiffies_relative(sysctl_stat_interval)); 2120 } 2016 } 2121 2017 2122 static void __init init_cpu_node_state(void) 2018 static void __init init_cpu_node_state(void) 2123 { 2019 { 2124 int node; 2020 int node; 2125 2021 2126 for_each_online_node(node) { 2022 for_each_online_node(node) { 2127 if (!cpumask_empty(cpumask_of !! 2023 if (cpumask_weight(cpumask_of_node(node)) > 0) 2128 node_set_state(node, 2024 node_set_state(node, N_CPU); 2129 } 2025 } 2130 } 2026 } 2131 2027 2132 static int vmstat_cpu_online(unsigned int cpu 2028 static int vmstat_cpu_online(unsigned int cpu) 2133 { 2029 { 2134 refresh_zone_stat_thresholds(); 2030 refresh_zone_stat_thresholds(); 2135 !! 2031 node_set_state(cpu_to_node(cpu), N_CPU); 2136 if (!node_state(cpu_to_node(cpu), N_C << 2137 node_set_state(cpu_to_node(cp << 2138 } << 2139 << 2140 return 0; 2032 return 0; 2141 } 2033 } 2142 2034 2143 static int vmstat_cpu_down_prep(unsigned int 2035 static int vmstat_cpu_down_prep(unsigned int cpu) 2144 { 2036 { 2145 cancel_delayed_work_sync(&per_cpu(vms 2037 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); 2146 return 0; 2038 return 0; 2147 } 2039 } 2148 2040 2149 static int vmstat_cpu_dead(unsigned int cpu) 2041 static int vmstat_cpu_dead(unsigned int cpu) 2150 { 2042 { 2151 const struct cpumask *node_cpus; 2043 const struct cpumask *node_cpus; 2152 int node; 2044 int node; 2153 2045 2154 node = cpu_to_node(cpu); 2046 node = cpu_to_node(cpu); 2155 2047 2156 refresh_zone_stat_thresholds(); 2048 refresh_zone_stat_thresholds(); 2157 node_cpus = cpumask_of_node(node); 2049 node_cpus = cpumask_of_node(node); 2158 if (!cpumask_empty(node_cpus)) !! 2050 if (cpumask_weight(node_cpus) > 0) 2159 return 0; 2051 return 0; 2160 2052 2161 node_clear_state(node, N_CPU); 2053 node_clear_state(node, N_CPU); 2162 << 2163 return 0; 2054 return 0; 2164 } 2055 } 2165 2056 2166 #endif 2057 #endif 2167 2058 2168 struct workqueue_struct *mm_percpu_wq; 2059 struct workqueue_struct *mm_percpu_wq; 2169 2060 2170 void __init init_mm_internals(void) 2061 void __init init_mm_internals(void) 2171 { 2062 { 2172 int ret __maybe_unused; 2063 int ret __maybe_unused; 2173 2064 2174 mm_percpu_wq = alloc_workqueue("mm_pe 2065 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0); 2175 2066 2176 #ifdef CONFIG_SMP 2067 #ifdef CONFIG_SMP 2177 ret = cpuhp_setup_state_nocalls(CPUHP 2068 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", 2178 NULL, 2069 NULL, vmstat_cpu_dead); 2179 if (ret < 0) 2070 if (ret < 0) 2180 pr_err("vmstat: failed to reg 2071 pr_err("vmstat: failed to register 'dead' hotplug state\n"); 2181 2072 2182 ret = cpuhp_setup_state_nocalls(CPUHP 2073 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online", 2183 vmsta 2074 vmstat_cpu_online, 2184 vmsta 2075 vmstat_cpu_down_prep); 2185 if (ret < 0) 2076 if (ret < 0) 2186 pr_err("vmstat: failed to reg 2077 pr_err("vmstat: failed to register 'online' hotplug state\n"); 2187 2078 2188 cpus_read_lock(); 2079 cpus_read_lock(); 2189 init_cpu_node_state(); 2080 init_cpu_node_state(); 2190 cpus_read_unlock(); 2081 cpus_read_unlock(); 2191 2082 2192 start_shepherd_timer(); 2083 start_shepherd_timer(); 2193 #endif 2084 #endif 2194 #ifdef CONFIG_PROC_FS 2085 #ifdef CONFIG_PROC_FS 2195 proc_create_seq("buddyinfo", 0444, NU 2086 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op); 2196 proc_create_seq("pagetypeinfo", 0400, 2087 proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); 2197 proc_create_seq("vmstat", 0444, NULL, 2088 proc_create_seq("vmstat", 0444, NULL, &vmstat_op); 2198 proc_create_seq("zoneinfo", 0444, NUL 2089 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); 2199 #endif 2090 #endif 2200 } 2091 } 2201 2092 2202 #if defined(CONFIG_DEBUG_FS) && defined(CONFI 2093 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 2203 2094 2204 /* 2095 /* 2205 * Return an index indicating how much of the 2096 * Return an index indicating how much of the available free memory is 2206 * unusable for an allocation of the requeste 2097 * unusable for an allocation of the requested size. 2207 */ 2098 */ 2208 static int unusable_free_index(unsigned int o 2099 static int unusable_free_index(unsigned int order, 2209 struct contig 2100 struct contig_page_info *info) 2210 { 2101 { 2211 /* No free memory is interpreted as a 2102 /* No free memory is interpreted as all free memory is unusable */ 2212 if (info->free_pages == 0) 2103 if (info->free_pages == 0) 2213 return 1000; 2104 return 1000; 2214 2105 2215 /* 2106 /* 2216 * Index should be a value between 0 2107 * Index should be a value between 0 and 1. Return a value to 3 2217 * decimal places. 2108 * decimal places. 2218 * 2109 * 2219 * 0 => no fragmentation 2110 * 0 => no fragmentation 2220 * 1 => high fragmentation 2111 * 1 => high fragmentation 2221 */ 2112 */ 2222 return div_u64((info->free_pages - (i 2113 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages); 2223 2114 2224 } 2115 } 2225 2116 2226 static void unusable_show_print(struct seq_fi 2117 static void unusable_show_print(struct seq_file *m, 2227 pg_da 2118 pg_data_t *pgdat, struct zone *zone) 2228 { 2119 { 2229 unsigned int order; 2120 unsigned int order; 2230 int index; 2121 int index; 2231 struct contig_page_info info; 2122 struct contig_page_info info; 2232 2123 2233 seq_printf(m, "Node %d, zone %8s ", 2124 seq_printf(m, "Node %d, zone %8s ", 2234 pgdat->node_i 2125 pgdat->node_id, 2235 zone->name); 2126 zone->name); 2236 for (order = 0; order < NR_PAGE_ORDER !! 2127 for (order = 0; order < MAX_ORDER; ++order) { 2237 fill_contig_page_info(zone, o 2128 fill_contig_page_info(zone, order, &info); 2238 index = unusable_free_index(o 2129 index = unusable_free_index(order, &info); 2239 seq_printf(m, "%d.%03d ", ind 2130 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2240 } 2131 } 2241 2132 2242 seq_putc(m, '\n'); 2133 seq_putc(m, '\n'); 2243 } 2134 } 2244 2135 2245 /* 2136 /* 2246 * Display unusable free space index 2137 * Display unusable free space index 2247 * 2138 * 2248 * The unusable free space index measures how 2139 * The unusable free space index measures how much of the available free 2249 * memory cannot be used to satisfy an alloca 2140 * memory cannot be used to satisfy an allocation of a given size and is a 2250 * value between 0 and 1. The higher the valu 2141 * value between 0 and 1. The higher the value, the more of free memory is 2251 * unusable and by implication, the worse the 2142 * unusable and by implication, the worse the external fragmentation is. This 2252 * can be expressed as a percentage by multip 2143 * can be expressed as a percentage by multiplying by 100. 2253 */ 2144 */ 2254 static int unusable_show(struct seq_file *m, 2145 static int unusable_show(struct seq_file *m, void *arg) 2255 { 2146 { 2256 pg_data_t *pgdat = (pg_data_t *)arg; 2147 pg_data_t *pgdat = (pg_data_t *)arg; 2257 2148 2258 /* check memoryless node */ 2149 /* check memoryless node */ 2259 if (!node_state(pgdat->node_id, N_MEM 2150 if (!node_state(pgdat->node_id, N_MEMORY)) 2260 return 0; 2151 return 0; 2261 2152 2262 walk_zones_in_node(m, pgdat, true, fa 2153 walk_zones_in_node(m, pgdat, true, false, unusable_show_print); 2263 2154 2264 return 0; 2155 return 0; 2265 } 2156 } 2266 2157 2267 static const struct seq_operations unusable_s 2158 static const struct seq_operations unusable_sops = { 2268 .start = frag_start, 2159 .start = frag_start, 2269 .next = frag_next, 2160 .next = frag_next, 2270 .stop = frag_stop, 2161 .stop = frag_stop, 2271 .show = unusable_show, 2162 .show = unusable_show, 2272 }; 2163 }; 2273 2164 2274 DEFINE_SEQ_ATTRIBUTE(unusable); 2165 DEFINE_SEQ_ATTRIBUTE(unusable); 2275 2166 2276 static void extfrag_show_print(struct seq_fil 2167 static void extfrag_show_print(struct seq_file *m, 2277 pg_da 2168 pg_data_t *pgdat, struct zone *zone) 2278 { 2169 { 2279 unsigned int order; 2170 unsigned int order; 2280 int index; 2171 int index; 2281 2172 2282 /* Alloc on stack as interrupts are d 2173 /* Alloc on stack as interrupts are disabled for zone walk */ 2283 struct contig_page_info info; 2174 struct contig_page_info info; 2284 2175 2285 seq_printf(m, "Node %d, zone %8s ", 2176 seq_printf(m, "Node %d, zone %8s ", 2286 pgdat->node_i 2177 pgdat->node_id, 2287 zone->name); 2178 zone->name); 2288 for (order = 0; order < NR_PAGE_ORDER !! 2179 for (order = 0; order < MAX_ORDER; ++order) { 2289 fill_contig_page_info(zone, o 2180 fill_contig_page_info(zone, order, &info); 2290 index = __fragmentation_index 2181 index = __fragmentation_index(order, &info); 2291 seq_printf(m, "%2d.%03d ", in !! 2182 seq_printf(m, "%d.%03d ", index / 1000, index % 1000); 2292 } 2183 } 2293 2184 2294 seq_putc(m, '\n'); 2185 seq_putc(m, '\n'); 2295 } 2186 } 2296 2187 2297 /* 2188 /* 2298 * Display fragmentation index for orders tha 2189 * Display fragmentation index for orders that allocations would fail for 2299 */ 2190 */ 2300 static int extfrag_show(struct seq_file *m, v 2191 static int extfrag_show(struct seq_file *m, void *arg) 2301 { 2192 { 2302 pg_data_t *pgdat = (pg_data_t *)arg; 2193 pg_data_t *pgdat = (pg_data_t *)arg; 2303 2194 2304 walk_zones_in_node(m, pgdat, true, fa 2195 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print); 2305 2196 2306 return 0; 2197 return 0; 2307 } 2198 } 2308 2199 2309 static const struct seq_operations extfrag_so 2200 static const struct seq_operations extfrag_sops = { 2310 .start = frag_start, 2201 .start = frag_start, 2311 .next = frag_next, 2202 .next = frag_next, 2312 .stop = frag_stop, 2203 .stop = frag_stop, 2313 .show = extfrag_show, 2204 .show = extfrag_show, 2314 }; 2205 }; 2315 2206 2316 DEFINE_SEQ_ATTRIBUTE(extfrag); 2207 DEFINE_SEQ_ATTRIBUTE(extfrag); 2317 2208 2318 static int __init extfrag_debug_init(void) 2209 static int __init extfrag_debug_init(void) 2319 { 2210 { 2320 struct dentry *extfrag_debug_root; 2211 struct dentry *extfrag_debug_root; 2321 2212 2322 extfrag_debug_root = debugfs_create_d 2213 extfrag_debug_root = debugfs_create_dir("extfrag", NULL); 2323 2214 2324 debugfs_create_file("unusable_index", 2215 debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL, 2325 &unusable_fops); 2216 &unusable_fops); 2326 2217 2327 debugfs_create_file("extfrag_index", 2218 debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL, 2328 &extfrag_fops); 2219 &extfrag_fops); 2329 2220 2330 return 0; 2221 return 0; 2331 } 2222 } 2332 2223 2333 module_init(extfrag_debug_init); 2224 module_init(extfrag_debug_init); 2334 << 2335 #endif 2225 #endif 2336 2226
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.