~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/mm/vmstat.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /mm/vmstat.c (Version linux-6.12-rc7) and /mm/vmstat.c (Version linux-5.8.18)


  1 // SPDX-License-Identifier: GPL-2.0-only            1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*                                                  2 /*
  3  *  linux/mm/vmstat.c                               3  *  linux/mm/vmstat.c
  4  *                                                  4  *
  5  *  Manages VM statistics                           5  *  Manages VM statistics
  6  *  Copyright (C) 1991, 1992, 1993, 1994  Linu      6  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  7  *                                                  7  *
  8  *  zoned VM statistics                             8  *  zoned VM statistics
  9  *  Copyright (C) 2006 Silicon Graphics, Inc.,      9  *  Copyright (C) 2006 Silicon Graphics, Inc.,
 10  *              Christoph Lameter <christoph@l     10  *              Christoph Lameter <christoph@lameter.com>
 11  *  Copyright (C) 2008-2014 Christoph Lameter      11  *  Copyright (C) 2008-2014 Christoph Lameter
 12  */                                                12  */
 13 #include <linux/fs.h>                              13 #include <linux/fs.h>
 14 #include <linux/mm.h>                              14 #include <linux/mm.h>
 15 #include <linux/err.h>                             15 #include <linux/err.h>
 16 #include <linux/module.h>                          16 #include <linux/module.h>
 17 #include <linux/slab.h>                            17 #include <linux/slab.h>
 18 #include <linux/cpu.h>                             18 #include <linux/cpu.h>
 19 #include <linux/cpumask.h>                         19 #include <linux/cpumask.h>
 20 #include <linux/vmstat.h>                          20 #include <linux/vmstat.h>
 21 #include <linux/proc_fs.h>                         21 #include <linux/proc_fs.h>
 22 #include <linux/seq_file.h>                        22 #include <linux/seq_file.h>
 23 #include <linux/debugfs.h>                         23 #include <linux/debugfs.h>
 24 #include <linux/sched.h>                           24 #include <linux/sched.h>
 25 #include <linux/math64.h>                          25 #include <linux/math64.h>
 26 #include <linux/writeback.h>                       26 #include <linux/writeback.h>
 27 #include <linux/compaction.h>                      27 #include <linux/compaction.h>
 28 #include <linux/mm_inline.h>                       28 #include <linux/mm_inline.h>
                                                   >>  29 #include <linux/page_ext.h>
 29 #include <linux/page_owner.h>                      30 #include <linux/page_owner.h>
 30 #include <linux/sched/isolation.h>             << 
 31                                                    31 
 32 #include "internal.h"                              32 #include "internal.h"
 33                                                    33 
                                                   >>  34 #define NUMA_STATS_THRESHOLD (U16_MAX - 2)
                                                   >>  35 
 34 #ifdef CONFIG_NUMA                                 36 #ifdef CONFIG_NUMA
 35 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;        37 int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
 36                                                    38 
 37 /* zero numa counters within a zone */             39 /* zero numa counters within a zone */
 38 static void zero_zone_numa_counters(struct zon     40 static void zero_zone_numa_counters(struct zone *zone)
 39 {                                                  41 {
 40         int item, cpu;                             42         int item, cpu;
 41                                                    43 
 42         for (item = 0; item < NR_VM_NUMA_EVENT !!  44         for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
 43                 atomic_long_set(&zone->vm_numa !!  45                 atomic_long_set(&zone->vm_numa_stat[item], 0);
 44                 for_each_online_cpu(cpu) {     !!  46                 for_each_online_cpu(cpu)
 45                         per_cpu_ptr(zone->per_ !!  47                         per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
 46                                                    48                                                 = 0;
 47                 }                              << 
 48         }                                          49         }
 49 }                                                  50 }
 50                                                    51 
 51 /* zero numa counters of all the populated zon     52 /* zero numa counters of all the populated zones */
 52 static void zero_zones_numa_counters(void)         53 static void zero_zones_numa_counters(void)
 53 {                                                  54 {
 54         struct zone *zone;                         55         struct zone *zone;
 55                                                    56 
 56         for_each_populated_zone(zone)              57         for_each_populated_zone(zone)
 57                 zero_zone_numa_counters(zone);     58                 zero_zone_numa_counters(zone);
 58 }                                                  59 }
 59                                                    60 
 60 /* zero global numa counters */                    61 /* zero global numa counters */
 61 static void zero_global_numa_counters(void)        62 static void zero_global_numa_counters(void)
 62 {                                                  63 {
 63         int item;                                  64         int item;
 64                                                    65 
 65         for (item = 0; item < NR_VM_NUMA_EVENT !!  66         for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
 66                 atomic_long_set(&vm_numa_event !!  67                 atomic_long_set(&vm_numa_stat[item], 0);
 67 }                                                  68 }
 68                                                    69 
 69 static void invalid_numa_statistics(void)          70 static void invalid_numa_statistics(void)
 70 {                                                  71 {
 71         zero_zones_numa_counters();                72         zero_zones_numa_counters();
 72         zero_global_numa_counters();               73         zero_global_numa_counters();
 73 }                                                  74 }
 74                                                    75 
 75 static DEFINE_MUTEX(vm_numa_stat_lock);            76 static DEFINE_MUTEX(vm_numa_stat_lock);
 76                                                    77 
 77 int sysctl_vm_numa_stat_handler(const struct c !!  78 int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
 78                 void *buffer, size_t *length,      79                 void *buffer, size_t *length, loff_t *ppos)
 79 {                                                  80 {
 80         int ret, oldval;                           81         int ret, oldval;
 81                                                    82 
 82         mutex_lock(&vm_numa_stat_lock);            83         mutex_lock(&vm_numa_stat_lock);
 83         if (write)                                 84         if (write)
 84                 oldval = sysctl_vm_numa_stat;      85                 oldval = sysctl_vm_numa_stat;
 85         ret = proc_dointvec_minmax(table, writ     86         ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
 86         if (ret || !write)                         87         if (ret || !write)
 87                 goto out;                          88                 goto out;
 88                                                    89 
 89         if (oldval == sysctl_vm_numa_stat)         90         if (oldval == sysctl_vm_numa_stat)
 90                 goto out;                          91                 goto out;
 91         else if (sysctl_vm_numa_stat == ENABLE     92         else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
 92                 static_branch_enable(&vm_numa_     93                 static_branch_enable(&vm_numa_stat_key);
 93                 pr_info("enable numa statistic     94                 pr_info("enable numa statistics\n");
 94         } else {                                   95         } else {
 95                 static_branch_disable(&vm_numa     96                 static_branch_disable(&vm_numa_stat_key);
 96                 invalid_numa_statistics();         97                 invalid_numa_statistics();
 97                 pr_info("disable numa statisti     98                 pr_info("disable numa statistics, and clear numa counters\n");
 98         }                                          99         }
 99                                                   100 
100 out:                                              101 out:
101         mutex_unlock(&vm_numa_stat_lock);         102         mutex_unlock(&vm_numa_stat_lock);
102         return ret;                               103         return ret;
103 }                                                 104 }
104 #endif                                            105 #endif
105                                                   106 
106 #ifdef CONFIG_VM_EVENT_COUNTERS                   107 #ifdef CONFIG_VM_EVENT_COUNTERS
107 DEFINE_PER_CPU(struct vm_event_state, vm_event    108 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
108 EXPORT_PER_CPU_SYMBOL(vm_event_states);           109 EXPORT_PER_CPU_SYMBOL(vm_event_states);
109                                                   110 
110 static void sum_vm_events(unsigned long *ret)     111 static void sum_vm_events(unsigned long *ret)
111 {                                                 112 {
112         int cpu;                                  113         int cpu;
113         int i;                                    114         int i;
114                                                   115 
115         memset(ret, 0, NR_VM_EVENT_ITEMS * siz    116         memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
116                                                   117 
117         for_each_online_cpu(cpu) {                118         for_each_online_cpu(cpu) {
118                 struct vm_event_state *this =     119                 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
119                                                   120 
120                 for (i = 0; i < NR_VM_EVENT_IT    121                 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
121                         ret[i] += this->event[    122                         ret[i] += this->event[i];
122         }                                         123         }
123 }                                                 124 }
124                                                   125 
125 /*                                                126 /*
126  * Accumulate the vm event counters across all    127  * Accumulate the vm event counters across all CPUs.
127  * The result is unavoidably approximate - it     128  * The result is unavoidably approximate - it can change
128  * during and after execution of this function    129  * during and after execution of this function.
129 */                                                130 */
130 void all_vm_events(unsigned long *ret)            131 void all_vm_events(unsigned long *ret)
131 {                                                 132 {
132         cpus_read_lock();                      !! 133         get_online_cpus();
133         sum_vm_events(ret);                       134         sum_vm_events(ret);
134         cpus_read_unlock();                    !! 135         put_online_cpus();
135 }                                                 136 }
136 EXPORT_SYMBOL_GPL(all_vm_events);                 137 EXPORT_SYMBOL_GPL(all_vm_events);
137                                                   138 
138 /*                                                139 /*
139  * Fold the foreign cpu events into our own.      140  * Fold the foreign cpu events into our own.
140  *                                                141  *
141  * This is adding to the events on one process    142  * This is adding to the events on one processor
142  * but keeps the global counts constant.          143  * but keeps the global counts constant.
143  */                                               144  */
144 void vm_events_fold_cpu(int cpu)                  145 void vm_events_fold_cpu(int cpu)
145 {                                                 146 {
146         struct vm_event_state *fold_state = &p    147         struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
147         int i;                                    148         int i;
148                                                   149 
149         for (i = 0; i < NR_VM_EVENT_ITEMS; i++    150         for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
150                 count_vm_events(i, fold_state-    151                 count_vm_events(i, fold_state->event[i]);
151                 fold_state->event[i] = 0;         152                 fold_state->event[i] = 0;
152         }                                         153         }
153 }                                                 154 }
154                                                   155 
155 #endif /* CONFIG_VM_EVENT_COUNTERS */             156 #endif /* CONFIG_VM_EVENT_COUNTERS */
156                                                   157 
157 /*                                                158 /*
158  * Manage combined zone based / global counter    159  * Manage combined zone based / global counters
159  *                                                160  *
160  * vm_stat contains the global counters           161  * vm_stat contains the global counters
161  */                                               162  */
162 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITE    163 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
                                                   >> 164 atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
163 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITE    165 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
164 atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_I << 
165 EXPORT_SYMBOL(vm_zone_stat);                      166 EXPORT_SYMBOL(vm_zone_stat);
                                                   >> 167 EXPORT_SYMBOL(vm_numa_stat);
166 EXPORT_SYMBOL(vm_node_stat);                      168 EXPORT_SYMBOL(vm_node_stat);
167                                                   169 
168 #ifdef CONFIG_NUMA                             << 
169 static void fold_vm_zone_numa_events(struct zo << 
170 {                                              << 
171         unsigned long zone_numa_events[NR_VM_N << 
172         int cpu;                               << 
173         enum numa_stat_item item;              << 
174                                                << 
175         for_each_online_cpu(cpu) {             << 
176                 struct per_cpu_zonestat *pzsta << 
177                                                << 
178                 pzstats = per_cpu_ptr(zone->pe << 
179                 for (item = 0; item < NR_VM_NU << 
180                         zone_numa_events[item] << 
181         }                                      << 
182                                                << 
183         for (item = 0; item < NR_VM_NUMA_EVENT << 
184                 zone_numa_event_add(zone_numa_ << 
185 }                                              << 
186                                                << 
187 void fold_vm_numa_events(void)                 << 
188 {                                              << 
189         struct zone *zone;                     << 
190                                                << 
191         for_each_populated_zone(zone)          << 
192                 fold_vm_zone_numa_events(zone) << 
193 }                                              << 
194 #endif                                         << 
195                                                << 
196 #ifdef CONFIG_SMP                                 170 #ifdef CONFIG_SMP
197                                                   171 
198 int calculate_pressure_threshold(struct zone *    172 int calculate_pressure_threshold(struct zone *zone)
199 {                                                 173 {
200         int threshold;                            174         int threshold;
201         int watermark_distance;                   175         int watermark_distance;
202                                                   176 
203         /*                                        177         /*
204          * As vmstats are not up to date, ther    178          * As vmstats are not up to date, there is drift between the estimated
205          * and real values. For high threshold    179          * and real values. For high thresholds and a high number of CPUs, it
206          * is possible for the min watermark t    180          * is possible for the min watermark to be breached while the estimated
207          * value looks fine. The pressure thre    181          * value looks fine. The pressure threshold is a reduced value such
208          * that even the maximum amount of dri    182          * that even the maximum amount of drift will not accidentally breach
209          * the min watermark                      183          * the min watermark
210          */                                       184          */
211         watermark_distance = low_wmark_pages(z    185         watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
212         threshold = max(1, (int)(watermark_dis    186         threshold = max(1, (int)(watermark_distance / num_online_cpus()));
213                                                   187 
214         /*                                        188         /*
215          * Maximum threshold is 125               189          * Maximum threshold is 125
216          */                                       190          */
217         threshold = min(125, threshold);          191         threshold = min(125, threshold);
218                                                   192 
219         return threshold;                         193         return threshold;
220 }                                                 194 }
221                                                   195 
222 int calculate_normal_threshold(struct zone *zo    196 int calculate_normal_threshold(struct zone *zone)
223 {                                                 197 {
224         int threshold;                            198         int threshold;
225         int mem;        /* memory in 128 MB un    199         int mem;        /* memory in 128 MB units */
226                                                   200 
227         /*                                        201         /*
228          * The threshold scales with the numbe    202          * The threshold scales with the number of processors and the amount
229          * of memory per zone. More memory mea    203          * of memory per zone. More memory means that we can defer updates for
230          * longer, more processors could lead     204          * longer, more processors could lead to more contention.
231          * fls() is used to have a cheap way o    205          * fls() is used to have a cheap way of logarithmic scaling.
232          *                                        206          *
233          * Some sample thresholds:                207          * Some sample thresholds:
234          *                                        208          *
235          * Threshold    Processors      (fls)  !! 209          * Threshold    Processors      (fls)   Zonesize        fls(mem+1)
236          * -----------------------------------    210          * ------------------------------------------------------------------
237          * 8            1               1         211          * 8            1               1       0.9-1 GB        4
238          * 16           2               2         212          * 16           2               2       0.9-1 GB        4
239          * 20           2               2         213          * 20           2               2       1-2 GB          5
240          * 24           2               2         214          * 24           2               2       2-4 GB          6
241          * 28           2               2         215          * 28           2               2       4-8 GB          7
242          * 32           2               2         216          * 32           2               2       8-16 GB         8
243          * 4            2               2         217          * 4            2               2       <128M           1
244          * 30           4               3         218          * 30           4               3       2-4 GB          5
245          * 48           4               3         219          * 48           4               3       8-16 GB         8
246          * 32           8               4         220          * 32           8               4       1-2 GB          4
247          * 32           8               4         221          * 32           8               4       0.9-1GB         4
248          * 10           16              5         222          * 10           16              5       <128M           1
249          * 40           16              5         223          * 40           16              5       900M            4
250          * 70           64              7         224          * 70           64              7       2-4 GB          5
251          * 84           64              7         225          * 84           64              7       4-8 GB          6
252          * 108          512             9         226          * 108          512             9       4-8 GB          6
253          * 125          1024            10        227          * 125          1024            10      8-16 GB         8
254          * 125          1024            10        228          * 125          1024            10      16-32 GB        9
255          */                                       229          */
256                                                   230 
257         mem = zone_managed_pages(zone) >> (27     231         mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
258                                                   232 
259         threshold = 2 * fls(num_online_cpus())    233         threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
260                                                   234 
261         /*                                        235         /*
262          * Maximum threshold is 125               236          * Maximum threshold is 125
263          */                                       237          */
264         threshold = min(125, threshold);          238         threshold = min(125, threshold);
265                                                   239 
266         return threshold;                         240         return threshold;
267 }                                                 241 }
268                                                   242 
269 /*                                                243 /*
270  * Refresh the thresholds for each zone.          244  * Refresh the thresholds for each zone.
271  */                                               245  */
272 void refresh_zone_stat_thresholds(void)           246 void refresh_zone_stat_thresholds(void)
273 {                                                 247 {
274         struct pglist_data *pgdat;                248         struct pglist_data *pgdat;
275         struct zone *zone;                        249         struct zone *zone;
276         int cpu;                                  250         int cpu;
277         int threshold;                            251         int threshold;
278                                                   252 
279         /* Zero current pgdat thresholds */       253         /* Zero current pgdat thresholds */
280         for_each_online_pgdat(pgdat) {            254         for_each_online_pgdat(pgdat) {
281                 for_each_online_cpu(cpu) {        255                 for_each_online_cpu(cpu) {
282                         per_cpu_ptr(pgdat->per    256                         per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
283                 }                                 257                 }
284         }                                         258         }
285                                                   259 
286         for_each_populated_zone(zone) {           260         for_each_populated_zone(zone) {
287                 struct pglist_data *pgdat = zo    261                 struct pglist_data *pgdat = zone->zone_pgdat;
288                 unsigned long max_drift, toler    262                 unsigned long max_drift, tolerate_drift;
289                                                   263 
290                 threshold = calculate_normal_t    264                 threshold = calculate_normal_threshold(zone);
291                                                   265 
292                 for_each_online_cpu(cpu) {        266                 for_each_online_cpu(cpu) {
293                         int pgdat_threshold;      267                         int pgdat_threshold;
294                                                   268 
295                         per_cpu_ptr(zone->per_ !! 269                         per_cpu_ptr(zone->pageset, cpu)->stat_threshold
296                                                   270                                                         = threshold;
297                                                   271 
298                         /* Base nodestat thres    272                         /* Base nodestat threshold on the largest populated zone. */
299                         pgdat_threshold = per_    273                         pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
300                         per_cpu_ptr(pgdat->per    274                         per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
301                                 = max(threshol    275                                 = max(threshold, pgdat_threshold);
302                 }                                 276                 }
303                                                   277 
304                 /*                                278                 /*
305                  * Only set percpu_drift_mark     279                  * Only set percpu_drift_mark if there is a danger that
306                  * NR_FREE_PAGES reports the l    280                  * NR_FREE_PAGES reports the low watermark is ok when in fact
307                  * the min watermark could be     281                  * the min watermark could be breached by an allocation
308                  */                               282                  */
309                 tolerate_drift = low_wmark_pag    283                 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
310                 max_drift = num_online_cpus()     284                 max_drift = num_online_cpus() * threshold;
311                 if (max_drift > tolerate_drift    285                 if (max_drift > tolerate_drift)
312                         zone->percpu_drift_mar    286                         zone->percpu_drift_mark = high_wmark_pages(zone) +
313                                         max_dr    287                                         max_drift;
314         }                                         288         }
315 }                                                 289 }
316                                                   290 
317 void set_pgdat_percpu_threshold(pg_data_t *pgd    291 void set_pgdat_percpu_threshold(pg_data_t *pgdat,
318                                 int (*calculat    292                                 int (*calculate_pressure)(struct zone *))
319 {                                                 293 {
320         struct zone *zone;                        294         struct zone *zone;
321         int cpu;                                  295         int cpu;
322         int threshold;                            296         int threshold;
323         int i;                                    297         int i;
324                                                   298 
325         for (i = 0; i < pgdat->nr_zones; i++)     299         for (i = 0; i < pgdat->nr_zones; i++) {
326                 zone = &pgdat->node_zones[i];     300                 zone = &pgdat->node_zones[i];
327                 if (!zone->percpu_drift_mark)     301                 if (!zone->percpu_drift_mark)
328                         continue;                 302                         continue;
329                                                   303 
330                 threshold = (*calculate_pressu    304                 threshold = (*calculate_pressure)(zone);
331                 for_each_online_cpu(cpu)          305                 for_each_online_cpu(cpu)
332                         per_cpu_ptr(zone->per_ !! 306                         per_cpu_ptr(zone->pageset, cpu)->stat_threshold
333                                                   307                                                         = threshold;
334         }                                         308         }
335 }                                                 309 }
336                                                   310 
337 /*                                                311 /*
338  * For use when we know that interrupts are di    312  * For use when we know that interrupts are disabled,
339  * or when we know that preemption is disabled    313  * or when we know that preemption is disabled and that
340  * particular counter cannot be updated from i    314  * particular counter cannot be updated from interrupt context.
341  */                                               315  */
342 void __mod_zone_page_state(struct zone *zone,     316 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
343                            long delta)            317                            long delta)
344 {                                                 318 {
345         struct per_cpu_zonestat __percpu *pcp  !! 319         struct per_cpu_pageset __percpu *pcp = zone->pageset;
346         s8 __percpu *p = pcp->vm_stat_diff + i    320         s8 __percpu *p = pcp->vm_stat_diff + item;
347         long x;                                   321         long x;
348         long t;                                   322         long t;
349                                                   323 
350         /*                                     << 
351          * Accurate vmstat updates require a R << 
352          * atomicity is provided by IRQs being << 
353          * or via local_lock_irq. On PREEMPT_R << 
354          * CPU migrations and preemption poten << 
355          * disable preemption.                 << 
356          */                                    << 
357         preempt_disable_nested();              << 
358                                                << 
359         x = delta + __this_cpu_read(*p);          324         x = delta + __this_cpu_read(*p);
360                                                   325 
361         t = __this_cpu_read(pcp->stat_threshol    326         t = __this_cpu_read(pcp->stat_threshold);
362                                                   327 
363         if (unlikely(abs(x) > t)) {            !! 328         if (unlikely(x > t || x < -t)) {
364                 zone_page_state_add(x, zone, i    329                 zone_page_state_add(x, zone, item);
365                 x = 0;                            330                 x = 0;
366         }                                         331         }
367         __this_cpu_write(*p, x);                  332         __this_cpu_write(*p, x);
368                                                << 
369         preempt_enable_nested();               << 
370 }                                                 333 }
371 EXPORT_SYMBOL(__mod_zone_page_state);             334 EXPORT_SYMBOL(__mod_zone_page_state);
372                                                   335 
373 void __mod_node_page_state(struct pglist_data     336 void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
374                                 long delta)       337                                 long delta)
375 {                                                 338 {
376         struct per_cpu_nodestat __percpu *pcp     339         struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
377         s8 __percpu *p = pcp->vm_node_stat_dif    340         s8 __percpu *p = pcp->vm_node_stat_diff + item;
378         long x;                                   341         long x;
379         long t;                                   342         long t;
380                                                   343 
381         if (vmstat_item_in_bytes(item)) {      << 
382                 /*                             << 
383                  * Only cgroups use subpage ac << 
384                  * the global level, these ite << 
385                  * multiples of whole pages. S << 
386                  * internally to keep the per- << 
387                  */                            << 
388                 VM_WARN_ON_ONCE(delta & (PAGE_ << 
389                 delta >>= PAGE_SHIFT;          << 
390         }                                      << 
391                                                << 
392         /* See __mod_node_page_state */        << 
393         preempt_disable_nested();              << 
394                                                << 
395         x = delta + __this_cpu_read(*p);          344         x = delta + __this_cpu_read(*p);
396                                                   345 
397         t = __this_cpu_read(pcp->stat_threshol    346         t = __this_cpu_read(pcp->stat_threshold);
398                                                   347 
399         if (unlikely(abs(x) > t)) {            !! 348         if (unlikely(x > t || x < -t)) {
400                 node_page_state_add(x, pgdat,     349                 node_page_state_add(x, pgdat, item);
401                 x = 0;                            350                 x = 0;
402         }                                         351         }
403         __this_cpu_write(*p, x);                  352         __this_cpu_write(*p, x);
404                                                << 
405         preempt_enable_nested();               << 
406 }                                                 353 }
407 EXPORT_SYMBOL(__mod_node_page_state);             354 EXPORT_SYMBOL(__mod_node_page_state);
408                                                   355 
409 /*                                                356 /*
410  * Optimized increment and decrement functions    357  * Optimized increment and decrement functions.
411  *                                                358  *
412  * These are only for a single page and theref    359  * These are only for a single page and therefore can take a struct page *
413  * argument instead of struct zone *. This all    360  * argument instead of struct zone *. This allows the inclusion of the code
414  * generated for page_zone(page) into the opti    361  * generated for page_zone(page) into the optimized functions.
415  *                                                362  *
416  * No overflow check is necessary and therefor    363  * No overflow check is necessary and therefore the differential can be
417  * incremented or decremented in place which m    364  * incremented or decremented in place which may allow the compilers to
418  * generate better code.                          365  * generate better code.
419  * The increment or decrement is known and the    366  * The increment or decrement is known and therefore one boundary check can
420  * be omitted.                                    367  * be omitted.
421  *                                                368  *
422  * NOTE: These functions are very performance     369  * NOTE: These functions are very performance sensitive. Change only
423  * with care.                                     370  * with care.
424  *                                                371  *
425  * Some processors have inc/dec instructions t    372  * Some processors have inc/dec instructions that are atomic vs an interrupt.
426  * However, the code must first determine the     373  * However, the code must first determine the differential location in a zone
427  * based on the processor number and then inc/    374  * based on the processor number and then inc/dec the counter. There is no
428  * guarantee without disabling preemption that    375  * guarantee without disabling preemption that the processor will not change
429  * in between and therefore the atomicity vs.     376  * in between and therefore the atomicity vs. interrupt cannot be exploited
430  * in a useful way here.                          377  * in a useful way here.
431  */                                               378  */
432 void __inc_zone_state(struct zone *zone, enum     379 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
433 {                                                 380 {
434         struct per_cpu_zonestat __percpu *pcp  !! 381         struct per_cpu_pageset __percpu *pcp = zone->pageset;
435         s8 __percpu *p = pcp->vm_stat_diff + i    382         s8 __percpu *p = pcp->vm_stat_diff + item;
436         s8 v, t;                                  383         s8 v, t;
437                                                   384 
438         /* See __mod_node_page_state */        << 
439         preempt_disable_nested();              << 
440                                                << 
441         v = __this_cpu_inc_return(*p);            385         v = __this_cpu_inc_return(*p);
442         t = __this_cpu_read(pcp->stat_threshol    386         t = __this_cpu_read(pcp->stat_threshold);
443         if (unlikely(v > t)) {                    387         if (unlikely(v > t)) {
444                 s8 overstep = t >> 1;             388                 s8 overstep = t >> 1;
445                                                   389 
446                 zone_page_state_add(v + overst    390                 zone_page_state_add(v + overstep, zone, item);
447                 __this_cpu_write(*p, -overstep    391                 __this_cpu_write(*p, -overstep);
448         }                                         392         }
449                                                << 
450         preempt_enable_nested();               << 
451 }                                                 393 }
452                                                   394 
453 void __inc_node_state(struct pglist_data *pgda    395 void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
454 {                                                 396 {
455         struct per_cpu_nodestat __percpu *pcp     397         struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
456         s8 __percpu *p = pcp->vm_node_stat_dif    398         s8 __percpu *p = pcp->vm_node_stat_diff + item;
457         s8 v, t;                                  399         s8 v, t;
458                                                   400 
459         VM_WARN_ON_ONCE(vmstat_item_in_bytes(i << 
460                                                << 
461         /* See __mod_node_page_state */        << 
462         preempt_disable_nested();              << 
463                                                << 
464         v = __this_cpu_inc_return(*p);            401         v = __this_cpu_inc_return(*p);
465         t = __this_cpu_read(pcp->stat_threshol    402         t = __this_cpu_read(pcp->stat_threshold);
466         if (unlikely(v > t)) {                    403         if (unlikely(v > t)) {
467                 s8 overstep = t >> 1;             404                 s8 overstep = t >> 1;
468                                                   405 
469                 node_page_state_add(v + overst    406                 node_page_state_add(v + overstep, pgdat, item);
470                 __this_cpu_write(*p, -overstep    407                 __this_cpu_write(*p, -overstep);
471         }                                         408         }
472                                                << 
473         preempt_enable_nested();               << 
474 }                                                 409 }
475                                                   410 
476 void __inc_zone_page_state(struct page *page,     411 void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
477 {                                                 412 {
478         __inc_zone_state(page_zone(page), item    413         __inc_zone_state(page_zone(page), item);
479 }                                                 414 }
480 EXPORT_SYMBOL(__inc_zone_page_state);             415 EXPORT_SYMBOL(__inc_zone_page_state);
481                                                   416 
482 void __inc_node_page_state(struct page *page,     417 void __inc_node_page_state(struct page *page, enum node_stat_item item)
483 {                                                 418 {
484         __inc_node_state(page_pgdat(page), ite    419         __inc_node_state(page_pgdat(page), item);
485 }                                                 420 }
486 EXPORT_SYMBOL(__inc_node_page_state);             421 EXPORT_SYMBOL(__inc_node_page_state);
487                                                   422 
488 void __dec_zone_state(struct zone *zone, enum     423 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
489 {                                                 424 {
490         struct per_cpu_zonestat __percpu *pcp  !! 425         struct per_cpu_pageset __percpu *pcp = zone->pageset;
491         s8 __percpu *p = pcp->vm_stat_diff + i    426         s8 __percpu *p = pcp->vm_stat_diff + item;
492         s8 v, t;                                  427         s8 v, t;
493                                                   428 
494         /* See __mod_node_page_state */        << 
495         preempt_disable_nested();              << 
496                                                << 
497         v = __this_cpu_dec_return(*p);            429         v = __this_cpu_dec_return(*p);
498         t = __this_cpu_read(pcp->stat_threshol    430         t = __this_cpu_read(pcp->stat_threshold);
499         if (unlikely(v < - t)) {                  431         if (unlikely(v < - t)) {
500                 s8 overstep = t >> 1;             432                 s8 overstep = t >> 1;
501                                                   433 
502                 zone_page_state_add(v - overst    434                 zone_page_state_add(v - overstep, zone, item);
503                 __this_cpu_write(*p, overstep)    435                 __this_cpu_write(*p, overstep);
504         }                                         436         }
505                                                << 
506         preempt_enable_nested();               << 
507 }                                                 437 }
508                                                   438 
509 void __dec_node_state(struct pglist_data *pgda    439 void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
510 {                                                 440 {
511         struct per_cpu_nodestat __percpu *pcp     441         struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
512         s8 __percpu *p = pcp->vm_node_stat_dif    442         s8 __percpu *p = pcp->vm_node_stat_diff + item;
513         s8 v, t;                                  443         s8 v, t;
514                                                   444 
515         VM_WARN_ON_ONCE(vmstat_item_in_bytes(i << 
516                                                << 
517         /* See __mod_node_page_state */        << 
518         preempt_disable_nested();              << 
519                                                << 
520         v = __this_cpu_dec_return(*p);            445         v = __this_cpu_dec_return(*p);
521         t = __this_cpu_read(pcp->stat_threshol    446         t = __this_cpu_read(pcp->stat_threshold);
522         if (unlikely(v < - t)) {                  447         if (unlikely(v < - t)) {
523                 s8 overstep = t >> 1;             448                 s8 overstep = t >> 1;
524                                                   449 
525                 node_page_state_add(v - overst    450                 node_page_state_add(v - overstep, pgdat, item);
526                 __this_cpu_write(*p, overstep)    451                 __this_cpu_write(*p, overstep);
527         }                                         452         }
528                                                << 
529         preempt_enable_nested();               << 
530 }                                                 453 }
531                                                   454 
532 void __dec_zone_page_state(struct page *page,     455 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
533 {                                                 456 {
534         __dec_zone_state(page_zone(page), item    457         __dec_zone_state(page_zone(page), item);
535 }                                                 458 }
536 EXPORT_SYMBOL(__dec_zone_page_state);             459 EXPORT_SYMBOL(__dec_zone_page_state);
537                                                   460 
538 void __dec_node_page_state(struct page *page,     461 void __dec_node_page_state(struct page *page, enum node_stat_item item)
539 {                                                 462 {
540         __dec_node_state(page_pgdat(page), ite    463         __dec_node_state(page_pgdat(page), item);
541 }                                                 464 }
542 EXPORT_SYMBOL(__dec_node_page_state);             465 EXPORT_SYMBOL(__dec_node_page_state);
543                                                   466 
544 #ifdef CONFIG_HAVE_CMPXCHG_LOCAL                  467 #ifdef CONFIG_HAVE_CMPXCHG_LOCAL
545 /*                                                468 /*
546  * If we have cmpxchg_local support then we do    469  * If we have cmpxchg_local support then we do not need to incur the overhead
547  * that comes with local_irq_save/restore if w    470  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.
548  *                                                471  *
549  * mod_state() modifies the zone counter state    472  * mod_state() modifies the zone counter state through atomic per cpu
550  * operations.                                    473  * operations.
551  *                                                474  *
552  * Overstep mode specifies how overstep should    475  * Overstep mode specifies how overstep should handled:
553  *     0       No overstepping                    476  *     0       No overstepping
554  *     1       Overstepping half of threshold     477  *     1       Overstepping half of threshold
555  *     -1      Overstepping minus half of thre    478  *     -1      Overstepping minus half of threshold
556 */                                                479 */
557 static inline void mod_zone_state(struct zone     480 static inline void mod_zone_state(struct zone *zone,
558        enum zone_stat_item item, long delta, i    481        enum zone_stat_item item, long delta, int overstep_mode)
559 {                                                 482 {
560         struct per_cpu_zonestat __percpu *pcp  !! 483         struct per_cpu_pageset __percpu *pcp = zone->pageset;
561         s8 __percpu *p = pcp->vm_stat_diff + i    484         s8 __percpu *p = pcp->vm_stat_diff + item;
562         long n, t, z;                          !! 485         long o, n, t, z;
563         s8 o;                                  << 
564                                                   486 
565         o = this_cpu_read(*p);                 << 
566         do {                                      487         do {
567                 z = 0;  /* overflow to zone co    488                 z = 0;  /* overflow to zone counters */
568                                                   489 
569                 /*                                490                 /*
570                  * The fetching of the stat_th    491                  * The fetching of the stat_threshold is racy. We may apply
571                  * a counter threshold to the     492                  * a counter threshold to the wrong the cpu if we get
572                  * rescheduled while executing    493                  * rescheduled while executing here. However, the next
573                  * counter update will apply t    494                  * counter update will apply the threshold again and
574                  * therefore bring the counter    495                  * therefore bring the counter under the threshold again.
575                  *                                496                  *
576                  * Most of the time the thresh    497                  * Most of the time the thresholds are the same anyways
577                  * for all cpus in a zone.        498                  * for all cpus in a zone.
578                  */                               499                  */
579                 t = this_cpu_read(pcp->stat_th    500                 t = this_cpu_read(pcp->stat_threshold);
580                                                   501 
581                 n = delta + (long)o;           !! 502                 o = this_cpu_read(*p);
                                                   >> 503                 n = delta + o;
582                                                   504 
583                 if (abs(n) > t) {              !! 505                 if (n > t || n < -t) {
584                         int os = overstep_mode    506                         int os = overstep_mode * (t >> 1) ;
585                                                   507 
586                         /* Overflow must be ad    508                         /* Overflow must be added to zone counters */
587                         z = n + os;               509                         z = n + os;
588                         n = -os;                  510                         n = -os;
589                 }                                 511                 }
590         } while (!this_cpu_try_cmpxchg(*p, &o, !! 512         } while (this_cpu_cmpxchg(*p, o, n) != o);
591                                                   513 
592         if (z)                                    514         if (z)
593                 zone_page_state_add(z, zone, i    515                 zone_page_state_add(z, zone, item);
594 }                                                 516 }
595                                                   517 
596 void mod_zone_page_state(struct zone *zone, en    518 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
597                          long delta)              519                          long delta)
598 {                                                 520 {
599         mod_zone_state(zone, item, delta, 0);     521         mod_zone_state(zone, item, delta, 0);
600 }                                                 522 }
601 EXPORT_SYMBOL(mod_zone_page_state);               523 EXPORT_SYMBOL(mod_zone_page_state);
602                                                   524 
603 void inc_zone_page_state(struct page *page, en    525 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
604 {                                                 526 {
605         mod_zone_state(page_zone(page), item,     527         mod_zone_state(page_zone(page), item, 1, 1);
606 }                                                 528 }
607 EXPORT_SYMBOL(inc_zone_page_state);               529 EXPORT_SYMBOL(inc_zone_page_state);
608                                                   530 
609 void dec_zone_page_state(struct page *page, en    531 void dec_zone_page_state(struct page *page, enum zone_stat_item item)
610 {                                                 532 {
611         mod_zone_state(page_zone(page), item,     533         mod_zone_state(page_zone(page), item, -1, -1);
612 }                                                 534 }
613 EXPORT_SYMBOL(dec_zone_page_state);               535 EXPORT_SYMBOL(dec_zone_page_state);
614                                                   536 
615 static inline void mod_node_state(struct pglis    537 static inline void mod_node_state(struct pglist_data *pgdat,
616        enum node_stat_item item, int delta, in    538        enum node_stat_item item, int delta, int overstep_mode)
617 {                                                 539 {
618         struct per_cpu_nodestat __percpu *pcp     540         struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
619         s8 __percpu *p = pcp->vm_node_stat_dif    541         s8 __percpu *p = pcp->vm_node_stat_diff + item;
620         long n, t, z;                          !! 542         long o, n, t, z;
621         s8 o;                                  << 
622                                                << 
623         if (vmstat_item_in_bytes(item)) {      << 
624                 /*                             << 
625                  * Only cgroups use subpage ac << 
626                  * the global level, these ite << 
627                  * multiples of whole pages. S << 
628                  * internally to keep the per- << 
629                  */                            << 
630                 VM_WARN_ON_ONCE(delta & (PAGE_ << 
631                 delta >>= PAGE_SHIFT;          << 
632         }                                      << 
633                                                   543 
634         o = this_cpu_read(*p);                 << 
635         do {                                      544         do {
636                 z = 0;  /* overflow to node co    545                 z = 0;  /* overflow to node counters */
637                                                   546 
638                 /*                                547                 /*
639                  * The fetching of the stat_th    548                  * The fetching of the stat_threshold is racy. We may apply
640                  * a counter threshold to the     549                  * a counter threshold to the wrong the cpu if we get
641                  * rescheduled while executing    550                  * rescheduled while executing here. However, the next
642                  * counter update will apply t    551                  * counter update will apply the threshold again and
643                  * therefore bring the counter    552                  * therefore bring the counter under the threshold again.
644                  *                                553                  *
645                  * Most of the time the thresh    554                  * Most of the time the thresholds are the same anyways
646                  * for all cpus in a node.        555                  * for all cpus in a node.
647                  */                               556                  */
648                 t = this_cpu_read(pcp->stat_th    557                 t = this_cpu_read(pcp->stat_threshold);
649                                                   558 
650                 n = delta + (long)o;           !! 559                 o = this_cpu_read(*p);
                                                   >> 560                 n = delta + o;
651                                                   561 
652                 if (abs(n) > t) {              !! 562                 if (n > t || n < -t) {
653                         int os = overstep_mode    563                         int os = overstep_mode * (t >> 1) ;
654                                                   564 
655                         /* Overflow must be ad    565                         /* Overflow must be added to node counters */
656                         z = n + os;               566                         z = n + os;
657                         n = -os;                  567                         n = -os;
658                 }                                 568                 }
659         } while (!this_cpu_try_cmpxchg(*p, &o, !! 569         } while (this_cpu_cmpxchg(*p, o, n) != o);
660                                                   570 
661         if (z)                                    571         if (z)
662                 node_page_state_add(z, pgdat,     572                 node_page_state_add(z, pgdat, item);
663 }                                                 573 }
664                                                   574 
665 void mod_node_page_state(struct pglist_data *p    575 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
666                                         long d    576                                         long delta)
667 {                                                 577 {
668         mod_node_state(pgdat, item, delta, 0);    578         mod_node_state(pgdat, item, delta, 0);
669 }                                                 579 }
670 EXPORT_SYMBOL(mod_node_page_state);               580 EXPORT_SYMBOL(mod_node_page_state);
671                                                   581 
672 void inc_node_state(struct pglist_data *pgdat,    582 void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
673 {                                                 583 {
674         mod_node_state(pgdat, item, 1, 1);        584         mod_node_state(pgdat, item, 1, 1);
675 }                                                 585 }
676                                                   586 
677 void inc_node_page_state(struct page *page, en    587 void inc_node_page_state(struct page *page, enum node_stat_item item)
678 {                                                 588 {
679         mod_node_state(page_pgdat(page), item,    589         mod_node_state(page_pgdat(page), item, 1, 1);
680 }                                                 590 }
681 EXPORT_SYMBOL(inc_node_page_state);               591 EXPORT_SYMBOL(inc_node_page_state);
682                                                   592 
683 void dec_node_page_state(struct page *page, en    593 void dec_node_page_state(struct page *page, enum node_stat_item item)
684 {                                                 594 {
685         mod_node_state(page_pgdat(page), item,    595         mod_node_state(page_pgdat(page), item, -1, -1);
686 }                                                 596 }
687 EXPORT_SYMBOL(dec_node_page_state);               597 EXPORT_SYMBOL(dec_node_page_state);
688 #else                                             598 #else
689 /*                                                599 /*
690  * Use interrupt disable to serialize counter     600  * Use interrupt disable to serialize counter updates
691  */                                               601  */
692 void mod_zone_page_state(struct zone *zone, en    602 void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
693                          long delta)              603                          long delta)
694 {                                                 604 {
695         unsigned long flags;                      605         unsigned long flags;
696                                                   606 
697         local_irq_save(flags);                    607         local_irq_save(flags);
698         __mod_zone_page_state(zone, item, delt    608         __mod_zone_page_state(zone, item, delta);
699         local_irq_restore(flags);                 609         local_irq_restore(flags);
700 }                                                 610 }
701 EXPORT_SYMBOL(mod_zone_page_state);               611 EXPORT_SYMBOL(mod_zone_page_state);
702                                                   612 
703 void inc_zone_page_state(struct page *page, en    613 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
704 {                                                 614 {
705         unsigned long flags;                      615         unsigned long flags;
706         struct zone *zone;                        616         struct zone *zone;
707                                                   617 
708         zone = page_zone(page);                   618         zone = page_zone(page);
709         local_irq_save(flags);                    619         local_irq_save(flags);
710         __inc_zone_state(zone, item);             620         __inc_zone_state(zone, item);
711         local_irq_restore(flags);                 621         local_irq_restore(flags);
712 }                                                 622 }
713 EXPORT_SYMBOL(inc_zone_page_state);               623 EXPORT_SYMBOL(inc_zone_page_state);
714                                                   624 
715 void dec_zone_page_state(struct page *page, en    625 void dec_zone_page_state(struct page *page, enum zone_stat_item item)
716 {                                                 626 {
717         unsigned long flags;                      627         unsigned long flags;
718                                                   628 
719         local_irq_save(flags);                    629         local_irq_save(flags);
720         __dec_zone_page_state(page, item);        630         __dec_zone_page_state(page, item);
721         local_irq_restore(flags);                 631         local_irq_restore(flags);
722 }                                                 632 }
723 EXPORT_SYMBOL(dec_zone_page_state);               633 EXPORT_SYMBOL(dec_zone_page_state);
724                                                   634 
725 void inc_node_state(struct pglist_data *pgdat,    635 void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
726 {                                                 636 {
727         unsigned long flags;                      637         unsigned long flags;
728                                                   638 
729         local_irq_save(flags);                    639         local_irq_save(flags);
730         __inc_node_state(pgdat, item);            640         __inc_node_state(pgdat, item);
731         local_irq_restore(flags);                 641         local_irq_restore(flags);
732 }                                                 642 }
733 EXPORT_SYMBOL(inc_node_state);                    643 EXPORT_SYMBOL(inc_node_state);
734                                                   644 
735 void mod_node_page_state(struct pglist_data *p    645 void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
736                                         long d    646                                         long delta)
737 {                                                 647 {
738         unsigned long flags;                      648         unsigned long flags;
739                                                   649 
740         local_irq_save(flags);                    650         local_irq_save(flags);
741         __mod_node_page_state(pgdat, item, del    651         __mod_node_page_state(pgdat, item, delta);
742         local_irq_restore(flags);                 652         local_irq_restore(flags);
743 }                                                 653 }
744 EXPORT_SYMBOL(mod_node_page_state);               654 EXPORT_SYMBOL(mod_node_page_state);
745                                                   655 
746 void inc_node_page_state(struct page *page, en    656 void inc_node_page_state(struct page *page, enum node_stat_item item)
747 {                                                 657 {
748         unsigned long flags;                      658         unsigned long flags;
749         struct pglist_data *pgdat;                659         struct pglist_data *pgdat;
750                                                   660 
751         pgdat = page_pgdat(page);                 661         pgdat = page_pgdat(page);
752         local_irq_save(flags);                    662         local_irq_save(flags);
753         __inc_node_state(pgdat, item);            663         __inc_node_state(pgdat, item);
754         local_irq_restore(flags);                 664         local_irq_restore(flags);
755 }                                                 665 }
756 EXPORT_SYMBOL(inc_node_page_state);               666 EXPORT_SYMBOL(inc_node_page_state);
757                                                   667 
758 void dec_node_page_state(struct page *page, en    668 void dec_node_page_state(struct page *page, enum node_stat_item item)
759 {                                                 669 {
760         unsigned long flags;                      670         unsigned long flags;
761                                                   671 
762         local_irq_save(flags);                    672         local_irq_save(flags);
763         __dec_node_page_state(page, item);        673         __dec_node_page_state(page, item);
764         local_irq_restore(flags);                 674         local_irq_restore(flags);
765 }                                                 675 }
766 EXPORT_SYMBOL(dec_node_page_state);               676 EXPORT_SYMBOL(dec_node_page_state);
767 #endif                                            677 #endif
768                                                   678 
769 /*                                                679 /*
770  * Fold a differential into the global counter    680  * Fold a differential into the global counters.
771  * Returns the number of counters updated.        681  * Returns the number of counters updated.
772  */                                               682  */
                                                   >> 683 #ifdef CONFIG_NUMA
                                                   >> 684 static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
                                                   >> 685 {
                                                   >> 686         int i;
                                                   >> 687         int changes = 0;
                                                   >> 688 
                                                   >> 689         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
                                                   >> 690                 if (zone_diff[i]) {
                                                   >> 691                         atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
                                                   >> 692                         changes++;
                                                   >> 693         }
                                                   >> 694 
                                                   >> 695         for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
                                                   >> 696                 if (numa_diff[i]) {
                                                   >> 697                         atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
                                                   >> 698                         changes++;
                                                   >> 699         }
                                                   >> 700 
                                                   >> 701         for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
                                                   >> 702                 if (node_diff[i]) {
                                                   >> 703                         atomic_long_add(node_diff[i], &vm_node_stat[i]);
                                                   >> 704                         changes++;
                                                   >> 705         }
                                                   >> 706         return changes;
                                                   >> 707 }
                                                   >> 708 #else
773 static int fold_diff(int *zone_diff, int *node    709 static int fold_diff(int *zone_diff, int *node_diff)
774 {                                                 710 {
775         int i;                                    711         int i;
776         int changes = 0;                          712         int changes = 0;
777                                                   713 
778         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS;    714         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
779                 if (zone_diff[i]) {               715                 if (zone_diff[i]) {
780                         atomic_long_add(zone_d    716                         atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
781                         changes++;                717                         changes++;
782         }                                         718         }
783                                                   719 
784         for (i = 0; i < NR_VM_NODE_STAT_ITEMS;    720         for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
785                 if (node_diff[i]) {               721                 if (node_diff[i]) {
786                         atomic_long_add(node_d    722                         atomic_long_add(node_diff[i], &vm_node_stat[i]);
787                         changes++;                723                         changes++;
788         }                                         724         }
789         return changes;                           725         return changes;
790 }                                                 726 }
                                                   >> 727 #endif /* CONFIG_NUMA */
791                                                   728 
792 /*                                                729 /*
793  * Update the zone counters for the current cp    730  * Update the zone counters for the current cpu.
794  *                                                731  *
795  * Note that refresh_cpu_vm_stats strives to o    732  * Note that refresh_cpu_vm_stats strives to only access
796  * node local memory. The per cpu pagesets on     733  * node local memory. The per cpu pagesets on remote zones are placed
797  * in the memory local to the processor using     734  * in the memory local to the processor using that pageset. So the
798  * loop over all zones will access a series of    735  * loop over all zones will access a series of cachelines local to
799  * the processor.                                 736  * the processor.
800  *                                                737  *
801  * The call to zone_page_state_add updates the    738  * The call to zone_page_state_add updates the cachelines with the
802  * statistics in the remote zone struct as wel    739  * statistics in the remote zone struct as well as the global cachelines
803  * with the global counters. These could cause    740  * with the global counters. These could cause remote node cache line
804  * bouncing and will have to be only done when    741  * bouncing and will have to be only done when necessary.
805  *                                                742  *
806  * The function returns the number of global c    743  * The function returns the number of global counters updated.
807  */                                               744  */
808 static int refresh_cpu_vm_stats(bool do_pagese    745 static int refresh_cpu_vm_stats(bool do_pagesets)
809 {                                                 746 {
810         struct pglist_data *pgdat;                747         struct pglist_data *pgdat;
811         struct zone *zone;                        748         struct zone *zone;
812         int i;                                    749         int i;
813         int global_zone_diff[NR_VM_ZONE_STAT_I    750         int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
                                                   >> 751 #ifdef CONFIG_NUMA
                                                   >> 752         int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
                                                   >> 753 #endif
814         int global_node_diff[NR_VM_NODE_STAT_I    754         int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
815         int changes = 0;                          755         int changes = 0;
816                                                   756 
817         for_each_populated_zone(zone) {           757         for_each_populated_zone(zone) {
818                 struct per_cpu_zonestat __perc !! 758                 struct per_cpu_pageset __percpu *p = zone->pageset;
819                 struct per_cpu_pages __percpu  << 
820                                                   759 
821                 for (i = 0; i < NR_VM_ZONE_STA    760                 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
822                         int v;                    761                         int v;
823                                                   762 
824                         v = this_cpu_xchg(pzst !! 763                         v = this_cpu_xchg(p->vm_stat_diff[i], 0);
825                         if (v) {                  764                         if (v) {
826                                                   765 
827                                 atomic_long_ad    766                                 atomic_long_add(v, &zone->vm_stat[i]);
828                                 global_zone_di    767                                 global_zone_diff[i] += v;
829 #ifdef CONFIG_NUMA                                768 #ifdef CONFIG_NUMA
830                                 /* 3 seconds i    769                                 /* 3 seconds idle till flush */
831                                 __this_cpu_wri !! 770                                 __this_cpu_write(p->expire, 3);
832 #endif                                            771 #endif
833                         }                         772                         }
834                 }                                 773                 }
                                                   >> 774 #ifdef CONFIG_NUMA
                                                   >> 775                 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
                                                   >> 776                         int v;
                                                   >> 777 
                                                   >> 778                         v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
                                                   >> 779                         if (v) {
                                                   >> 780 
                                                   >> 781                                 atomic_long_add(v, &zone->vm_numa_stat[i]);
                                                   >> 782                                 global_numa_diff[i] += v;
                                                   >> 783                                 __this_cpu_write(p->expire, 3);
                                                   >> 784                         }
                                                   >> 785                 }
835                                                   786 
836                 if (do_pagesets) {                787                 if (do_pagesets) {
837                         cond_resched();           788                         cond_resched();
838                                                << 
839                         changes += decay_pcp_h << 
840 #ifdef CONFIG_NUMA                             << 
841                         /*                        789                         /*
842                          * Deal with draining     790                          * Deal with draining the remote pageset of this
843                          * processor              791                          * processor
844                          *                        792                          *
845                          * Check if there are     793                          * Check if there are pages remaining in this pageset
846                          * if not then there i    794                          * if not then there is nothing to expire.
847                          */                       795                          */
848                         if (!__this_cpu_read(p !! 796                         if (!__this_cpu_read(p->expire) ||
849                                !__this_cpu_rea !! 797                                !__this_cpu_read(p->pcp.count))
850                                 continue;         798                                 continue;
851                                                   799 
852                         /*                        800                         /*
853                          * We never drain zone    801                          * We never drain zones local to this processor.
854                          */                       802                          */
855                         if (zone_to_nid(zone)     803                         if (zone_to_nid(zone) == numa_node_id()) {
856                                 __this_cpu_wri !! 804                                 __this_cpu_write(p->expire, 0);
857                                 continue;         805                                 continue;
858                         }                         806                         }
859                                                   807 
860                         if (__this_cpu_dec_ret !! 808                         if (__this_cpu_dec_return(p->expire))
861                                 changes++;     << 
862                                 continue;         809                                 continue;
863                         }                      << 
864                                                   810 
865                         if (__this_cpu_read(pc !! 811                         if (__this_cpu_read(p->pcp.count)) {
866                                 drain_zone_pag !! 812                                 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
867                                 changes++;        813                                 changes++;
868                         }                         814                         }
869 #endif                                         << 
870                 }                                 815                 }
                                                   >> 816 #endif
871         }                                         817         }
872                                                   818 
873         for_each_online_pgdat(pgdat) {            819         for_each_online_pgdat(pgdat) {
874                 struct per_cpu_nodestat __perc    820                 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
875                                                   821 
876                 for (i = 0; i < NR_VM_NODE_STA    822                 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
877                         int v;                    823                         int v;
878                                                   824 
879                         v = this_cpu_xchg(p->v    825                         v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
880                         if (v) {                  826                         if (v) {
881                                 atomic_long_ad    827                                 atomic_long_add(v, &pgdat->vm_stat[i]);
882                                 global_node_di    828                                 global_node_diff[i] += v;
883                         }                         829                         }
884                 }                                 830                 }
885         }                                         831         }
886                                                   832 
                                                   >> 833 #ifdef CONFIG_NUMA
                                                   >> 834         changes += fold_diff(global_zone_diff, global_numa_diff,
                                                   >> 835                              global_node_diff);
                                                   >> 836 #else
887         changes += fold_diff(global_zone_diff,    837         changes += fold_diff(global_zone_diff, global_node_diff);
                                                   >> 838 #endif
888         return changes;                           839         return changes;
889 }                                                 840 }
890                                                   841 
891 /*                                                842 /*
892  * Fold the data for an offline cpu into the g    843  * Fold the data for an offline cpu into the global array.
893  * There cannot be any access by the offline c    844  * There cannot be any access by the offline cpu and therefore
894  * synchronization is simplified.                 845  * synchronization is simplified.
895  */                                               846  */
896 void cpu_vm_stats_fold(int cpu)                   847 void cpu_vm_stats_fold(int cpu)
897 {                                                 848 {
898         struct pglist_data *pgdat;                849         struct pglist_data *pgdat;
899         struct zone *zone;                        850         struct zone *zone;
900         int i;                                    851         int i;
901         int global_zone_diff[NR_VM_ZONE_STAT_I    852         int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
                                                   >> 853 #ifdef CONFIG_NUMA
                                                   >> 854         int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
                                                   >> 855 #endif
902         int global_node_diff[NR_VM_NODE_STAT_I    856         int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
903                                                   857 
904         for_each_populated_zone(zone) {           858         for_each_populated_zone(zone) {
905                 struct per_cpu_zonestat *pzsta !! 859                 struct per_cpu_pageset *p;
906                                                   860 
907                 pzstats = per_cpu_ptr(zone->pe !! 861                 p = per_cpu_ptr(zone->pageset, cpu);
908                                                   862 
909                 for (i = 0; i < NR_VM_ZONE_STA !! 863                 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
910                         if (pzstats->vm_stat_d !! 864                         if (p->vm_stat_diff[i]) {
911                                 int v;            865                                 int v;
912                                                   866 
913                                 v = pzstats->v !! 867                                 v = p->vm_stat_diff[i];
914                                 pzstats->vm_st !! 868                                 p->vm_stat_diff[i] = 0;
915                                 atomic_long_ad    869                                 atomic_long_add(v, &zone->vm_stat[i]);
916                                 global_zone_di    870                                 global_zone_diff[i] += v;
917                         }                         871                         }
918                 }                              !! 872 
919 #ifdef CONFIG_NUMA                                873 #ifdef CONFIG_NUMA
920                 for (i = 0; i < NR_VM_NUMA_EVE !! 874                 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
921                         if (pzstats->vm_numa_e !! 875                         if (p->vm_numa_stat_diff[i]) {
922                                 unsigned long  !! 876                                 int v;
923                                                !! 877 
924                                 v = pzstats->v !! 878                                 v = p->vm_numa_stat_diff[i];
925                                 pzstats->vm_nu !! 879                                 p->vm_numa_stat_diff[i] = 0;
926                                 zone_numa_even !! 880                                 atomic_long_add(v, &zone->vm_numa_stat[i]);
                                                   >> 881                                 global_numa_diff[i] += v;
927                         }                         882                         }
928                 }                              << 
929 #endif                                            883 #endif
930         }                                         884         }
931                                                   885 
932         for_each_online_pgdat(pgdat) {            886         for_each_online_pgdat(pgdat) {
933                 struct per_cpu_nodestat *p;       887                 struct per_cpu_nodestat *p;
934                                                   888 
935                 p = per_cpu_ptr(pgdat->per_cpu    889                 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
936                                                   890 
937                 for (i = 0; i < NR_VM_NODE_STA    891                 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
938                         if (p->vm_node_stat_di    892                         if (p->vm_node_stat_diff[i]) {
939                                 int v;            893                                 int v;
940                                                   894 
941                                 v = p->vm_node    895                                 v = p->vm_node_stat_diff[i];
942                                 p->vm_node_sta    896                                 p->vm_node_stat_diff[i] = 0;
943                                 atomic_long_ad    897                                 atomic_long_add(v, &pgdat->vm_stat[i]);
944                                 global_node_di    898                                 global_node_diff[i] += v;
945                         }                         899                         }
946         }                                         900         }
947                                                   901 
                                                   >> 902 #ifdef CONFIG_NUMA
                                                   >> 903         fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
                                                   >> 904 #else
948         fold_diff(global_zone_diff, global_nod    905         fold_diff(global_zone_diff, global_node_diff);
                                                   >> 906 #endif
949 }                                                 907 }
950                                                   908 
951 /*                                                909 /*
952  * this is only called if !populated_zone(zone    910  * this is only called if !populated_zone(zone), which implies no other users of
953  * pset->vm_stat_diff[] exist.                 !! 911  * pset->vm_stat_diff[] exsist.
954  */                                               912  */
955 void drain_zonestat(struct zone *zone, struct  !! 913 void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
956 {                                                 914 {
957         unsigned long v;                       << 
958         int i;                                    915         int i;
959                                                   916 
960         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; !! 917         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
961                 if (pzstats->vm_stat_diff[i])  !! 918                 if (pset->vm_stat_diff[i]) {
962                         v = pzstats->vm_stat_d !! 919                         int v = pset->vm_stat_diff[i];
963                         pzstats->vm_stat_diff[ !! 920                         pset->vm_stat_diff[i] = 0;
964                         zone_page_state_add(v, !! 921                         atomic_long_add(v, &zone->vm_stat[i]);
                                                   >> 922                         atomic_long_add(v, &vm_zone_stat[i]);
965                 }                                 923                 }
966         }                                      << 
967                                                   924 
968 #ifdef CONFIG_NUMA                                925 #ifdef CONFIG_NUMA
969         for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS !! 926         for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
970                 if (pzstats->vm_numa_event[i]) !! 927                 if (pset->vm_numa_stat_diff[i]) {
971                         v = pzstats->vm_numa_e !! 928                         int v = pset->vm_numa_stat_diff[i];
972                         pzstats->vm_numa_event !! 929 
973                         zone_numa_event_add(v, !! 930                         pset->vm_numa_stat_diff[i] = 0;
                                                   >> 931                         atomic_long_add(v, &zone->vm_numa_stat[i]);
                                                   >> 932                         atomic_long_add(v, &vm_numa_stat[i]);
974                 }                                 933                 }
975         }                                      << 
976 #endif                                            934 #endif
977 }                                                 935 }
978 #endif                                            936 #endif
979                                                   937 
980 #ifdef CONFIG_NUMA                                938 #ifdef CONFIG_NUMA
                                                   >> 939 void __inc_numa_state(struct zone *zone,
                                                   >> 940                                  enum numa_stat_item item)
                                                   >> 941 {
                                                   >> 942         struct per_cpu_pageset __percpu *pcp = zone->pageset;
                                                   >> 943         u16 __percpu *p = pcp->vm_numa_stat_diff + item;
                                                   >> 944         u16 v;
                                                   >> 945 
                                                   >> 946         v = __this_cpu_inc_return(*p);
                                                   >> 947 
                                                   >> 948         if (unlikely(v > NUMA_STATS_THRESHOLD)) {
                                                   >> 949                 zone_numa_state_add(v, zone, item);
                                                   >> 950                 __this_cpu_write(*p, 0);
                                                   >> 951         }
                                                   >> 952 }
                                                   >> 953 
981 /*                                                954 /*
982  * Determine the per node value of a stat item    955  * Determine the per node value of a stat item. This function
983  * is called frequently in a NUMA machine, so     956  * is called frequently in a NUMA machine, so try to be as
984  * frugal as possible.                            957  * frugal as possible.
985  */                                               958  */
986 unsigned long sum_zone_node_page_state(int nod    959 unsigned long sum_zone_node_page_state(int node,
987                                  enum zone_sta    960                                  enum zone_stat_item item)
988 {                                                 961 {
989         struct zone *zones = NODE_DATA(node)->    962         struct zone *zones = NODE_DATA(node)->node_zones;
990         int i;                                    963         int i;
991         unsigned long count = 0;                  964         unsigned long count = 0;
992                                                   965 
993         for (i = 0; i < MAX_NR_ZONES; i++)        966         for (i = 0; i < MAX_NR_ZONES; i++)
994                 count += zone_page_state(zones    967                 count += zone_page_state(zones + i, item);
995                                                   968 
996         return count;                             969         return count;
997 }                                                 970 }
998                                                   971 
999 /* Determine the per node value of a numa stat !! 972 /*
1000 unsigned long sum_zone_numa_event_state(int n !! 973  * Determine the per node value of a numa stat item. To avoid deviation,
                                                   >> 974  * the per cpu stat number in vm_numa_stat_diff[] is also included.
                                                   >> 975  */
                                                   >> 976 unsigned long sum_zone_numa_state(int node,
1001                                  enum numa_st    977                                  enum numa_stat_item item)
1002 {                                                978 {
1003         struct zone *zones = NODE_DATA(node)-    979         struct zone *zones = NODE_DATA(node)->node_zones;
1004         unsigned long count = 0;              << 
1005         int i;                                   980         int i;
                                                   >> 981         unsigned long count = 0;
1006                                                  982 
1007         for (i = 0; i < MAX_NR_ZONES; i++)       983         for (i = 0; i < MAX_NR_ZONES; i++)
1008                 count += zone_numa_event_stat !! 984                 count += zone_numa_state_snapshot(zones + i, item);
1009                                                  985 
1010         return count;                            986         return count;
1011 }                                                987 }
1012                                                  988 
1013 /*                                               989 /*
1014  * Determine the per node value of a stat ite    990  * Determine the per node value of a stat item.
1015  */                                              991  */
1016 unsigned long node_page_state_pages(struct pg !! 992 unsigned long node_page_state(struct pglist_data *pgdat,
1017                                     enum node !! 993                                 enum node_stat_item item)
1018 {                                                994 {
1019         long x = atomic_long_read(&pgdat->vm_    995         long x = atomic_long_read(&pgdat->vm_stat[item]);
1020 #ifdef CONFIG_SMP                                996 #ifdef CONFIG_SMP
1021         if (x < 0)                               997         if (x < 0)
1022                 x = 0;                           998                 x = 0;
1023 #endif                                           999 #endif
1024         return x;                                1000         return x;
1025 }                                                1001 }
1026                                               << 
1027 unsigned long node_page_state(struct pglist_d << 
1028                               enum node_stat_ << 
1029 {                                             << 
1030         VM_WARN_ON_ONCE(vmstat_item_in_bytes( << 
1031                                               << 
1032         return node_page_state_pages(pgdat, i << 
1033 }                                             << 
1034 #endif                                           1002 #endif
1035                                                  1003 
1036 /*                                            << 
1037  * Count number of pages "struct page" and "s << 
1038  * nr_memmap_boot_pages: # of pages allocated << 
1039  * nr_memmap_pages: # of pages that were allo << 
1040  */                                           << 
1041 static atomic_long_t nr_memmap_boot_pages = A << 
1042 static atomic_long_t nr_memmap_pages = ATOMIC << 
1043                                               << 
1044 void memmap_boot_pages_add(long delta)        << 
1045 {                                             << 
1046         atomic_long_add(delta, &nr_memmap_boo << 
1047 }                                             << 
1048                                               << 
1049 void memmap_pages_add(long delta)             << 
1050 {                                             << 
1051         atomic_long_add(delta, &nr_memmap_pag << 
1052 }                                             << 
1053                                               << 
1054 #ifdef CONFIG_COMPACTION                         1004 #ifdef CONFIG_COMPACTION
1055                                                  1005 
1056 struct contig_page_info {                        1006 struct contig_page_info {
1057         unsigned long free_pages;                1007         unsigned long free_pages;
1058         unsigned long free_blocks_total;         1008         unsigned long free_blocks_total;
1059         unsigned long free_blocks_suitable;      1009         unsigned long free_blocks_suitable;
1060 };                                               1010 };
1061                                                  1011 
1062 /*                                               1012 /*
1063  * Calculate the number of free pages in a zo    1013  * Calculate the number of free pages in a zone, how many contiguous
1064  * pages are free and how many are large enou    1014  * pages are free and how many are large enough to satisfy an allocation of
1065  * the target size. Note that this function m    1015  * the target size. Note that this function makes no attempt to estimate
1066  * how many suitable free blocks there *might    1016  * how many suitable free blocks there *might* be if MOVABLE pages were
1067  * migrated. Calculating that is possible, bu    1017  * migrated. Calculating that is possible, but expensive and can be
1068  * figured out from userspace                    1018  * figured out from userspace
1069  */                                              1019  */
1070 static void fill_contig_page_info(struct zone    1020 static void fill_contig_page_info(struct zone *zone,
1071                                 unsigned int     1021                                 unsigned int suitable_order,
1072                                 struct contig    1022                                 struct contig_page_info *info)
1073 {                                                1023 {
1074         unsigned int order;                      1024         unsigned int order;
1075                                                  1025 
1076         info->free_pages = 0;                    1026         info->free_pages = 0;
1077         info->free_blocks_total = 0;             1027         info->free_blocks_total = 0;
1078         info->free_blocks_suitable = 0;          1028         info->free_blocks_suitable = 0;
1079                                                  1029 
1080         for (order = 0; order < NR_PAGE_ORDER !! 1030         for (order = 0; order < MAX_ORDER; order++) {
1081                 unsigned long blocks;            1031                 unsigned long blocks;
1082                                                  1032 
1083                 /*                            !! 1033                 /* Count number of free blocks */
1084                  * Count number of free block !! 1034                 blocks = zone->free_area[order].nr_free;
1085                  *                            << 
1086                  * Access to nr_free is lockl << 
1087                  * diagnostic purposes. Use d << 
1088                  */                           << 
1089                 blocks = data_race(zone->free << 
1090                 info->free_blocks_total += bl    1035                 info->free_blocks_total += blocks;
1091                                                  1036 
1092                 /* Count free base pages */      1037                 /* Count free base pages */
1093                 info->free_pages += blocks <<    1038                 info->free_pages += blocks << order;
1094                                                  1039 
1095                 /* Count the suitable free bl    1040                 /* Count the suitable free blocks */
1096                 if (order >= suitable_order)     1041                 if (order >= suitable_order)
1097                         info->free_blocks_sui    1042                         info->free_blocks_suitable += blocks <<
1098                                                  1043                                                 (order - suitable_order);
1099         }                                        1044         }
1100 }                                                1045 }
1101                                                  1046 
1102 /*                                               1047 /*
1103  * A fragmentation index only makes sense if     1048  * A fragmentation index only makes sense if an allocation of a requested
1104  * size would fail. If that is true, the frag    1049  * size would fail. If that is true, the fragmentation index indicates
1105  * whether external fragmentation or a lack o    1050  * whether external fragmentation or a lack of memory was the problem.
1106  * The value can be used to determine if page    1051  * The value can be used to determine if page reclaim or compaction
1107  * should be used                                1052  * should be used
1108  */                                              1053  */
1109 static int __fragmentation_index(unsigned int    1054 static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1110 {                                                1055 {
1111         unsigned long requested = 1UL << orde    1056         unsigned long requested = 1UL << order;
1112                                                  1057 
1113         if (WARN_ON_ONCE(order > MAX_PAGE_ORD !! 1058         if (WARN_ON_ONCE(order >= MAX_ORDER))
1114                 return 0;                        1059                 return 0;
1115                                                  1060 
1116         if (!info->free_blocks_total)            1061         if (!info->free_blocks_total)
1117                 return 0;                        1062                 return 0;
1118                                                  1063 
1119         /* Fragmentation index only makes sen    1064         /* Fragmentation index only makes sense when a request would fail */
1120         if (info->free_blocks_suitable)          1065         if (info->free_blocks_suitable)
1121                 return -1000;                    1066                 return -1000;
1122                                                  1067 
1123         /*                                       1068         /*
1124          * Index is between 0 and 1 so return    1069          * Index is between 0 and 1 so return within 3 decimal places
1125          *                                       1070          *
1126          * 0 => allocation would fail due to     1071          * 0 => allocation would fail due to lack of memory
1127          * 1 => allocation would fail due to     1072          * 1 => allocation would fail due to fragmentation
1128          */                                      1073          */
1129         return 1000 - div_u64( (1000+(div_u64    1074         return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1130 }                                                1075 }
1131                                                  1076 
1132 /*                                            << 
1133  * Calculates external fragmentation within a << 
1134  * It is defined as the percentage of pages f << 
1135  * less than 1 << order. It returns values in << 
1136  */                                           << 
1137 unsigned int extfrag_for_order(struct zone *z << 
1138 {                                             << 
1139         struct contig_page_info info;         << 
1140                                               << 
1141         fill_contig_page_info(zone, order, &i << 
1142         if (info.free_pages == 0)             << 
1143                 return 0;                     << 
1144                                               << 
1145         return div_u64((info.free_pages -     << 
1146                         (info.free_blocks_sui << 
1147                         info.free_pages);     << 
1148 }                                             << 
1149                                               << 
1150 /* Same as __fragmentation index but allocs c    1077 /* Same as __fragmentation index but allocs contig_page_info on stack */
1151 int fragmentation_index(struct zone *zone, un    1078 int fragmentation_index(struct zone *zone, unsigned int order)
1152 {                                                1079 {
1153         struct contig_page_info info;            1080         struct contig_page_info info;
1154                                                  1081 
1155         fill_contig_page_info(zone, order, &i    1082         fill_contig_page_info(zone, order, &info);
1156         return __fragmentation_index(order, &    1083         return __fragmentation_index(order, &info);
1157 }                                                1084 }
1158 #endif                                           1085 #endif
1159                                                  1086 
1160 #if defined(CONFIG_PROC_FS) || defined(CONFIG    1087 #if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1161     defined(CONFIG_NUMA) || defined(CONFIG_ME    1088     defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1162 #ifdef CONFIG_ZONE_DMA                           1089 #ifdef CONFIG_ZONE_DMA
1163 #define TEXT_FOR_DMA(xx) xx "_dma",              1090 #define TEXT_FOR_DMA(xx) xx "_dma",
1164 #else                                            1091 #else
1165 #define TEXT_FOR_DMA(xx)                         1092 #define TEXT_FOR_DMA(xx)
1166 #endif                                           1093 #endif
1167                                                  1094 
1168 #ifdef CONFIG_ZONE_DMA32                         1095 #ifdef CONFIG_ZONE_DMA32
1169 #define TEXT_FOR_DMA32(xx) xx "_dma32",          1096 #define TEXT_FOR_DMA32(xx) xx "_dma32",
1170 #else                                            1097 #else
1171 #define TEXT_FOR_DMA32(xx)                       1098 #define TEXT_FOR_DMA32(xx)
1172 #endif                                           1099 #endif
1173                                                  1100 
1174 #ifdef CONFIG_HIGHMEM                            1101 #ifdef CONFIG_HIGHMEM
1175 #define TEXT_FOR_HIGHMEM(xx) xx "_high",         1102 #define TEXT_FOR_HIGHMEM(xx) xx "_high",
1176 #else                                            1103 #else
1177 #define TEXT_FOR_HIGHMEM(xx)                     1104 #define TEXT_FOR_HIGHMEM(xx)
1178 #endif                                           1105 #endif
1179                                                  1106 
1180 #ifdef CONFIG_ZONE_DEVICE                     << 
1181 #define TEXT_FOR_DEVICE(xx) xx "_device",     << 
1182 #else                                         << 
1183 #define TEXT_FOR_DEVICE(xx)                   << 
1184 #endif                                        << 
1185                                               << 
1186 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx)     1107 #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1187                                         TEXT_ !! 1108                                         TEXT_FOR_HIGHMEM(xx) xx "_movable",
1188                                         TEXT_ << 
1189                                                  1109 
1190 const char * const vmstat_text[] = {             1110 const char * const vmstat_text[] = {
1191         /* enum zone_stat_item counters */       1111         /* enum zone_stat_item counters */
1192         "nr_free_pages",                         1112         "nr_free_pages",
1193         "nr_zone_inactive_anon",                 1113         "nr_zone_inactive_anon",
1194         "nr_zone_active_anon",                   1114         "nr_zone_active_anon",
1195         "nr_zone_inactive_file",                 1115         "nr_zone_inactive_file",
1196         "nr_zone_active_file",                   1116         "nr_zone_active_file",
1197         "nr_zone_unevictable",                   1117         "nr_zone_unevictable",
1198         "nr_zone_write_pending",                 1118         "nr_zone_write_pending",
1199         "nr_mlock",                              1119         "nr_mlock",
                                                   >> 1120         "nr_page_table_pages",
                                                   >> 1121         "nr_kernel_stack",
                                                   >> 1122 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
                                                   >> 1123         "nr_shadow_call_stack",
                                                   >> 1124 #endif
1200         "nr_bounce",                             1125         "nr_bounce",
1201 #if IS_ENABLED(CONFIG_ZSMALLOC)                  1126 #if IS_ENABLED(CONFIG_ZSMALLOC)
1202         "nr_zspages",                            1127         "nr_zspages",
1203 #endif                                           1128 #endif
1204         "nr_free_cma",                           1129         "nr_free_cma",
1205 #ifdef CONFIG_UNACCEPTED_MEMORY               << 
1206         "nr_unaccepted",                      << 
1207 #endif                                        << 
1208                                                  1130 
1209         /* enum numa_stat_item counters */       1131         /* enum numa_stat_item counters */
1210 #ifdef CONFIG_NUMA                               1132 #ifdef CONFIG_NUMA
1211         "numa_hit",                              1133         "numa_hit",
1212         "numa_miss",                             1134         "numa_miss",
1213         "numa_foreign",                          1135         "numa_foreign",
1214         "numa_interleave",                       1136         "numa_interleave",
1215         "numa_local",                            1137         "numa_local",
1216         "numa_other",                            1138         "numa_other",
1217 #endif                                           1139 #endif
1218                                                  1140 
1219         /* enum node_stat_item counters */       1141         /* enum node_stat_item counters */
1220         "nr_inactive_anon",                      1142         "nr_inactive_anon",
1221         "nr_active_anon",                        1143         "nr_active_anon",
1222         "nr_inactive_file",                      1144         "nr_inactive_file",
1223         "nr_active_file",                        1145         "nr_active_file",
1224         "nr_unevictable",                        1146         "nr_unevictable",
1225         "nr_slab_reclaimable",                   1147         "nr_slab_reclaimable",
1226         "nr_slab_unreclaimable",                 1148         "nr_slab_unreclaimable",
1227         "nr_isolated_anon",                      1149         "nr_isolated_anon",
1228         "nr_isolated_file",                      1150         "nr_isolated_file",
1229         "workingset_nodes",                      1151         "workingset_nodes",
1230         "workingset_refault_anon",            !! 1152         "workingset_refault",
1231         "workingset_refault_file",            !! 1153         "workingset_activate",
1232         "workingset_activate_anon",           !! 1154         "workingset_restore",
1233         "workingset_activate_file",           << 
1234         "workingset_restore_anon",            << 
1235         "workingset_restore_file",            << 
1236         "workingset_nodereclaim",                1155         "workingset_nodereclaim",
1237         "nr_anon_pages",                         1156         "nr_anon_pages",
1238         "nr_mapped",                             1157         "nr_mapped",
1239         "nr_file_pages",                         1158         "nr_file_pages",
1240         "nr_dirty",                              1159         "nr_dirty",
1241         "nr_writeback",                          1160         "nr_writeback",
1242         "nr_writeback_temp",                     1161         "nr_writeback_temp",
1243         "nr_shmem",                              1162         "nr_shmem",
1244         "nr_shmem_hugepages",                    1163         "nr_shmem_hugepages",
1245         "nr_shmem_pmdmapped",                    1164         "nr_shmem_pmdmapped",
1246         "nr_file_hugepages",                     1165         "nr_file_hugepages",
1247         "nr_file_pmdmapped",                     1166         "nr_file_pmdmapped",
1248         "nr_anon_transparent_hugepages",         1167         "nr_anon_transparent_hugepages",
1249         "nr_vmscan_write",                       1168         "nr_vmscan_write",
1250         "nr_vmscan_immediate_reclaim",           1169         "nr_vmscan_immediate_reclaim",
1251         "nr_dirtied",                            1170         "nr_dirtied",
1252         "nr_written",                            1171         "nr_written",
1253         "nr_throttled_written",               << 
1254         "nr_kernel_misc_reclaimable",            1172         "nr_kernel_misc_reclaimable",
1255         "nr_foll_pin_acquired",                  1173         "nr_foll_pin_acquired",
1256         "nr_foll_pin_released",                  1174         "nr_foll_pin_released",
1257         "nr_kernel_stack",                    !! 1175 
1258 #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)      !! 1176         /* enum writeback_stat_item counters */
1259         "nr_shadow_call_stack",               << 
1260 #endif                                        << 
1261         "nr_page_table_pages",                << 
1262         "nr_sec_page_table_pages",            << 
1263 #ifdef CONFIG_IOMMU_SUPPORT                   << 
1264         "nr_iommu_pages",                     << 
1265 #endif                                        << 
1266 #ifdef CONFIG_SWAP                            << 
1267         "nr_swapcached",                      << 
1268 #endif                                        << 
1269 #ifdef CONFIG_NUMA_BALANCING                  << 
1270         "pgpromote_success",                  << 
1271         "pgpromote_candidate",                << 
1272 #endif                                        << 
1273         "pgdemote_kswapd",                    << 
1274         "pgdemote_direct",                    << 
1275         "pgdemote_khugepaged",                << 
1276         /* system-wide enum vm_stat_item coun << 
1277         "nr_dirty_threshold",                    1177         "nr_dirty_threshold",
1278         "nr_dirty_background_threshold",         1178         "nr_dirty_background_threshold",
1279         "nr_memmap_pages",                    << 
1280         "nr_memmap_boot_pages",               << 
1281                                                  1179 
1282 #if defined(CONFIG_VM_EVENT_COUNTERS) || defi    1180 #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
1283         /* enum vm_event_item counters */        1181         /* enum vm_event_item counters */
1284         "pgpgin",                                1182         "pgpgin",
1285         "pgpgout",                               1183         "pgpgout",
1286         "pswpin",                                1184         "pswpin",
1287         "pswpout",                               1185         "pswpout",
1288                                                  1186 
1289         TEXTS_FOR_ZONES("pgalloc")               1187         TEXTS_FOR_ZONES("pgalloc")
1290         TEXTS_FOR_ZONES("allocstall")            1188         TEXTS_FOR_ZONES("allocstall")
1291         TEXTS_FOR_ZONES("pgskip")                1189         TEXTS_FOR_ZONES("pgskip")
1292                                                  1190 
1293         "pgfree",                                1191         "pgfree",
1294         "pgactivate",                            1192         "pgactivate",
1295         "pgdeactivate",                          1193         "pgdeactivate",
1296         "pglazyfree",                            1194         "pglazyfree",
1297                                                  1195 
1298         "pgfault",                               1196         "pgfault",
1299         "pgmajfault",                            1197         "pgmajfault",
1300         "pglazyfreed",                           1198         "pglazyfreed",
1301                                                  1199 
1302         "pgrefill",                              1200         "pgrefill",
1303         "pgreuse",                            << 
1304         "pgsteal_kswapd",                        1201         "pgsteal_kswapd",
1305         "pgsteal_direct",                        1202         "pgsteal_direct",
1306         "pgsteal_khugepaged",                 << 
1307         "pgscan_kswapd",                         1203         "pgscan_kswapd",
1308         "pgscan_direct",                         1204         "pgscan_direct",
1309         "pgscan_khugepaged",                  << 
1310         "pgscan_direct_throttle",                1205         "pgscan_direct_throttle",
1311         "pgscan_anon",                           1206         "pgscan_anon",
1312         "pgscan_file",                           1207         "pgscan_file",
1313         "pgsteal_anon",                          1208         "pgsteal_anon",
1314         "pgsteal_file",                          1209         "pgsteal_file",
1315                                                  1210 
1316 #ifdef CONFIG_NUMA                               1211 #ifdef CONFIG_NUMA
1317         "zone_reclaim_success",               << 
1318         "zone_reclaim_failed",                   1212         "zone_reclaim_failed",
1319 #endif                                           1213 #endif
1320         "pginodesteal",                          1214         "pginodesteal",
1321         "slabs_scanned",                         1215         "slabs_scanned",
1322         "kswapd_inodesteal",                     1216         "kswapd_inodesteal",
1323         "kswapd_low_wmark_hit_quickly",          1217         "kswapd_low_wmark_hit_quickly",
1324         "kswapd_high_wmark_hit_quickly",         1218         "kswapd_high_wmark_hit_quickly",
1325         "pageoutrun",                            1219         "pageoutrun",
1326                                                  1220 
1327         "pgrotated",                             1221         "pgrotated",
1328                                                  1222 
1329         "drop_pagecache",                        1223         "drop_pagecache",
1330         "drop_slab",                             1224         "drop_slab",
1331         "oom_kill",                              1225         "oom_kill",
1332                                                  1226 
1333 #ifdef CONFIG_NUMA_BALANCING                     1227 #ifdef CONFIG_NUMA_BALANCING
1334         "numa_pte_updates",                      1228         "numa_pte_updates",
1335         "numa_huge_pte_updates",                 1229         "numa_huge_pte_updates",
1336         "numa_hint_faults",                      1230         "numa_hint_faults",
1337         "numa_hint_faults_local",                1231         "numa_hint_faults_local",
1338         "numa_pages_migrated",                   1232         "numa_pages_migrated",
1339 #endif                                           1233 #endif
1340 #ifdef CONFIG_MIGRATION                          1234 #ifdef CONFIG_MIGRATION
1341         "pgmigrate_success",                     1235         "pgmigrate_success",
1342         "pgmigrate_fail",                        1236         "pgmigrate_fail",
1343         "thp_migration_success",              << 
1344         "thp_migration_fail",                 << 
1345         "thp_migration_split",                << 
1346 #endif                                           1237 #endif
1347 #ifdef CONFIG_COMPACTION                         1238 #ifdef CONFIG_COMPACTION
1348         "compact_migrate_scanned",               1239         "compact_migrate_scanned",
1349         "compact_free_scanned",                  1240         "compact_free_scanned",
1350         "compact_isolated",                      1241         "compact_isolated",
1351         "compact_stall",                         1242         "compact_stall",
1352         "compact_fail",                          1243         "compact_fail",
1353         "compact_success",                       1244         "compact_success",
1354         "compact_daemon_wake",                   1245         "compact_daemon_wake",
1355         "compact_daemon_migrate_scanned",        1246         "compact_daemon_migrate_scanned",
1356         "compact_daemon_free_scanned",           1247         "compact_daemon_free_scanned",
1357 #endif                                           1248 #endif
1358                                                  1249 
1359 #ifdef CONFIG_HUGETLB_PAGE                       1250 #ifdef CONFIG_HUGETLB_PAGE
1360         "htlb_buddy_alloc_success",              1251         "htlb_buddy_alloc_success",
1361         "htlb_buddy_alloc_fail",                 1252         "htlb_buddy_alloc_fail",
1362 #endif                                           1253 #endif
1363 #ifdef CONFIG_CMA                             << 
1364         "cma_alloc_success",                  << 
1365         "cma_alloc_fail",                     << 
1366 #endif                                        << 
1367         "unevictable_pgs_culled",                1254         "unevictable_pgs_culled",
1368         "unevictable_pgs_scanned",               1255         "unevictable_pgs_scanned",
1369         "unevictable_pgs_rescued",               1256         "unevictable_pgs_rescued",
1370         "unevictable_pgs_mlocked",               1257         "unevictable_pgs_mlocked",
1371         "unevictable_pgs_munlocked",             1258         "unevictable_pgs_munlocked",
1372         "unevictable_pgs_cleared",               1259         "unevictable_pgs_cleared",
1373         "unevictable_pgs_stranded",              1260         "unevictable_pgs_stranded",
1374                                                  1261 
1375 #ifdef CONFIG_TRANSPARENT_HUGEPAGE               1262 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1376         "thp_fault_alloc",                       1263         "thp_fault_alloc",
1377         "thp_fault_fallback",                    1264         "thp_fault_fallback",
1378         "thp_fault_fallback_charge",             1265         "thp_fault_fallback_charge",
1379         "thp_collapse_alloc",                    1266         "thp_collapse_alloc",
1380         "thp_collapse_alloc_failed",             1267         "thp_collapse_alloc_failed",
1381         "thp_file_alloc",                        1268         "thp_file_alloc",
1382         "thp_file_fallback",                     1269         "thp_file_fallback",
1383         "thp_file_fallback_charge",              1270         "thp_file_fallback_charge",
1384         "thp_file_mapped",                       1271         "thp_file_mapped",
1385         "thp_split_page",                        1272         "thp_split_page",
1386         "thp_split_page_failed",                 1273         "thp_split_page_failed",
1387         "thp_deferred_split_page",               1274         "thp_deferred_split_page",
1388         "thp_underused_split_page",           << 
1389         "thp_split_pmd",                         1275         "thp_split_pmd",
1390         "thp_scan_exceed_none_pte",           << 
1391         "thp_scan_exceed_swap_pte",           << 
1392         "thp_scan_exceed_share_pte",          << 
1393 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_    1276 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1394         "thp_split_pud",                         1277         "thp_split_pud",
1395 #endif                                           1278 #endif
1396         "thp_zero_page_alloc",                   1279         "thp_zero_page_alloc",
1397         "thp_zero_page_alloc_failed",            1280         "thp_zero_page_alloc_failed",
1398         "thp_swpout",                            1281         "thp_swpout",
1399         "thp_swpout_fallback",                   1282         "thp_swpout_fallback",
1400 #endif                                           1283 #endif
1401 #ifdef CONFIG_MEMORY_BALLOON                     1284 #ifdef CONFIG_MEMORY_BALLOON
1402         "balloon_inflate",                       1285         "balloon_inflate",
1403         "balloon_deflate",                       1286         "balloon_deflate",
1404 #ifdef CONFIG_BALLOON_COMPACTION                 1287 #ifdef CONFIG_BALLOON_COMPACTION
1405         "balloon_migrate",                       1288         "balloon_migrate",
1406 #endif                                           1289 #endif
1407 #endif /* CONFIG_MEMORY_BALLOON */               1290 #endif /* CONFIG_MEMORY_BALLOON */
1408 #ifdef CONFIG_DEBUG_TLBFLUSH                     1291 #ifdef CONFIG_DEBUG_TLBFLUSH
1409         "nr_tlb_remote_flush",                   1292         "nr_tlb_remote_flush",
1410         "nr_tlb_remote_flush_received",          1293         "nr_tlb_remote_flush_received",
1411         "nr_tlb_local_flush_all",                1294         "nr_tlb_local_flush_all",
1412         "nr_tlb_local_flush_one",                1295         "nr_tlb_local_flush_one",
1413 #endif /* CONFIG_DEBUG_TLBFLUSH */               1296 #endif /* CONFIG_DEBUG_TLBFLUSH */
1414                                                  1297 
                                                   >> 1298 #ifdef CONFIG_DEBUG_VM_VMACACHE
                                                   >> 1299         "vmacache_find_calls",
                                                   >> 1300         "vmacache_find_hits",
                                                   >> 1301 #endif
1415 #ifdef CONFIG_SWAP                               1302 #ifdef CONFIG_SWAP
1416         "swap_ra",                               1303         "swap_ra",
1417         "swap_ra_hit",                           1304         "swap_ra_hit",
1418 #ifdef CONFIG_KSM                             << 
1419         "ksm_swpin_copy",                     << 
1420 #endif                                        << 
1421 #endif                                        << 
1422 #ifdef CONFIG_KSM                             << 
1423         "cow_ksm",                            << 
1424 #endif                                        << 
1425 #ifdef CONFIG_ZSWAP                           << 
1426         "zswpin",                             << 
1427         "zswpout",                            << 
1428         "zswpwb",                             << 
1429 #endif                                        << 
1430 #ifdef CONFIG_X86                             << 
1431         "direct_map_level2_splits",           << 
1432         "direct_map_level3_splits",           << 
1433 #endif                                        << 
1434 #ifdef CONFIG_PER_VMA_LOCK_STATS              << 
1435         "vma_lock_success",                   << 
1436         "vma_lock_abort",                     << 
1437         "vma_lock_retry",                     << 
1438         "vma_lock_miss",                      << 
1439 #endif                                        << 
1440 #ifdef CONFIG_DEBUG_STACK_USAGE               << 
1441         "kstack_1k",                          << 
1442 #if THREAD_SIZE > 1024                        << 
1443         "kstack_2k",                          << 
1444 #endif                                        << 
1445 #if THREAD_SIZE > 2048                        << 
1446         "kstack_4k",                          << 
1447 #endif                                        << 
1448 #if THREAD_SIZE > 4096                        << 
1449         "kstack_8k",                          << 
1450 #endif                                        << 
1451 #if THREAD_SIZE > 8192                        << 
1452         "kstack_16k",                         << 
1453 #endif                                        << 
1454 #if THREAD_SIZE > 16384                       << 
1455         "kstack_32k",                         << 
1456 #endif                                        << 
1457 #if THREAD_SIZE > 32768                       << 
1458         "kstack_64k",                         << 
1459 #endif                                        << 
1460 #if THREAD_SIZE > 65536                       << 
1461         "kstack_rest",                        << 
1462 #endif                                        << 
1463 #endif                                           1305 #endif
1464 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_    1306 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
1465 };                                               1307 };
1466 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || C    1308 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
1467                                                  1309 
1468 #if (defined(CONFIG_DEBUG_FS) && defined(CONF    1310 #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1469      defined(CONFIG_PROC_FS)                     1311      defined(CONFIG_PROC_FS)
1470 static void *frag_start(struct seq_file *m, l    1312 static void *frag_start(struct seq_file *m, loff_t *pos)
1471 {                                                1313 {
1472         pg_data_t *pgdat;                        1314         pg_data_t *pgdat;
1473         loff_t node = *pos;                      1315         loff_t node = *pos;
1474                                                  1316 
1475         for (pgdat = first_online_pgdat();       1317         for (pgdat = first_online_pgdat();
1476              pgdat && node;                      1318              pgdat && node;
1477              pgdat = next_online_pgdat(pgdat)    1319              pgdat = next_online_pgdat(pgdat))
1478                 --node;                          1320                 --node;
1479                                                  1321 
1480         return pgdat;                            1322         return pgdat;
1481 }                                                1323 }
1482                                                  1324 
1483 static void *frag_next(struct seq_file *m, vo    1325 static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1484 {                                                1326 {
1485         pg_data_t *pgdat = (pg_data_t *)arg;     1327         pg_data_t *pgdat = (pg_data_t *)arg;
1486                                                  1328 
1487         (*pos)++;                                1329         (*pos)++;
1488         return next_online_pgdat(pgdat);         1330         return next_online_pgdat(pgdat);
1489 }                                                1331 }
1490                                                  1332 
1491 static void frag_stop(struct seq_file *m, voi    1333 static void frag_stop(struct seq_file *m, void *arg)
1492 {                                                1334 {
1493 }                                                1335 }
1494                                                  1336 
1495 /*                                               1337 /*
1496  * Walk zones in a node and print using a cal    1338  * Walk zones in a node and print using a callback.
1497  * If @assert_populated is true, only use cal    1339  * If @assert_populated is true, only use callback for zones that are populated.
1498  */                                              1340  */
1499 static void walk_zones_in_node(struct seq_fil    1341 static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1500                 bool assert_populated, bool n    1342                 bool assert_populated, bool nolock,
1501                 void (*print)(struct seq_file    1343                 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1502 {                                                1344 {
1503         struct zone *zone;                       1345         struct zone *zone;
1504         struct zone *node_zones = pgdat->node    1346         struct zone *node_zones = pgdat->node_zones;
1505         unsigned long flags;                     1347         unsigned long flags;
1506                                                  1348 
1507         for (zone = node_zones; zone - node_z    1349         for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1508                 if (assert_populated && !popu    1350                 if (assert_populated && !populated_zone(zone))
1509                         continue;                1351                         continue;
1510                                                  1352 
1511                 if (!nolock)                     1353                 if (!nolock)
1512                         spin_lock_irqsave(&zo    1354                         spin_lock_irqsave(&zone->lock, flags);
1513                 print(m, pgdat, zone);           1355                 print(m, pgdat, zone);
1514                 if (!nolock)                     1356                 if (!nolock)
1515                         spin_unlock_irqrestor    1357                         spin_unlock_irqrestore(&zone->lock, flags);
1516         }                                        1358         }
1517 }                                                1359 }
1518 #endif                                           1360 #endif
1519                                                  1361 
1520 #ifdef CONFIG_PROC_FS                            1362 #ifdef CONFIG_PROC_FS
1521 static void frag_show_print(struct seq_file *    1363 static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1522                                                  1364                                                 struct zone *zone)
1523 {                                                1365 {
1524         int order;                               1366         int order;
1525                                                  1367 
1526         seq_printf(m, "Node %d, zone %8s ", p    1368         seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1527         for (order = 0; order < NR_PAGE_ORDER !! 1369         for (order = 0; order < MAX_ORDER; ++order)
1528                 /*                            !! 1370                 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1529                  * Access to nr_free is lockl << 
1530                  * printing purposes. Use dat << 
1531                  */                           << 
1532                 seq_printf(m, "%6lu ", data_r << 
1533         seq_putc(m, '\n');                       1371         seq_putc(m, '\n');
1534 }                                                1372 }
1535                                                  1373 
1536 /*                                               1374 /*
1537  * This walks the free areas for each zone.      1375  * This walks the free areas for each zone.
1538  */                                              1376  */
1539 static int frag_show(struct seq_file *m, void    1377 static int frag_show(struct seq_file *m, void *arg)
1540 {                                                1378 {
1541         pg_data_t *pgdat = (pg_data_t *)arg;     1379         pg_data_t *pgdat = (pg_data_t *)arg;
1542         walk_zones_in_node(m, pgdat, true, fa    1380         walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1543         return 0;                                1381         return 0;
1544 }                                                1382 }
1545                                                  1383 
1546 static void pagetypeinfo_showfree_print(struc    1384 static void pagetypeinfo_showfree_print(struct seq_file *m,
1547                                         pg_da    1385                                         pg_data_t *pgdat, struct zone *zone)
1548 {                                                1386 {
1549         int order, mtype;                        1387         int order, mtype;
1550                                                  1388 
1551         for (mtype = 0; mtype < MIGRATE_TYPES    1389         for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1552                 seq_printf(m, "Node %4d, zone    1390                 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1553                                         pgdat    1391                                         pgdat->node_id,
1554                                         zone-    1392                                         zone->name,
1555                                         migra    1393                                         migratetype_names[mtype]);
1556                 for (order = 0; order < NR_PA !! 1394                 for (order = 0; order < MAX_ORDER; ++order) {
1557                         unsigned long freecou    1395                         unsigned long freecount = 0;
1558                         struct free_area *are    1396                         struct free_area *area;
1559                         struct list_head *cur    1397                         struct list_head *curr;
1560                         bool overflow = false    1398                         bool overflow = false;
1561                                                  1399 
1562                         area = &(zone->free_a    1400                         area = &(zone->free_area[order]);
1563                                                  1401 
1564                         list_for_each(curr, &    1402                         list_for_each(curr, &area->free_list[mtype]) {
1565                                 /*               1403                                 /*
1566                                  * Cap the fr    1404                                  * Cap the free_list iteration because it might
1567                                  * be really     1405                                  * be really large and we are under a spinlock
1568                                  * so a long     1406                                  * so a long time spent here could trigger a
1569                                  * hard locku    1407                                  * hard lockup detector. Anyway this is a
1570                                  * debugging     1408                                  * debugging tool so knowing there is a handful
1571                                  * of pages o    1409                                  * of pages of this order should be more than
1572                                  * sufficient    1410                                  * sufficient.
1573                                  */              1411                                  */
1574                                 if (++freecou    1412                                 if (++freecount >= 100000) {
1575                                         overf    1413                                         overflow = true;
1576                                         break    1414                                         break;
1577                                 }                1415                                 }
1578                         }                        1416                         }
1579                         seq_printf(m, "%s%6lu    1417                         seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
1580                         spin_unlock_irq(&zone    1418                         spin_unlock_irq(&zone->lock);
1581                         cond_resched();          1419                         cond_resched();
1582                         spin_lock_irq(&zone->    1420                         spin_lock_irq(&zone->lock);
1583                 }                                1421                 }
1584                 seq_putc(m, '\n');               1422                 seq_putc(m, '\n');
1585         }                                        1423         }
1586 }                                                1424 }
1587                                                  1425 
1588 /* Print out the free pages at each order for    1426 /* Print out the free pages at each order for each migatetype */
1589 static void pagetypeinfo_showfree(struct seq_ !! 1427 static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1590 {                                                1428 {
1591         int order;                               1429         int order;
1592         pg_data_t *pgdat = (pg_data_t *)arg;     1430         pg_data_t *pgdat = (pg_data_t *)arg;
1593                                                  1431 
1594         /* Print header */                       1432         /* Print header */
1595         seq_printf(m, "%-43s ", "Free pages c    1433         seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1596         for (order = 0; order < NR_PAGE_ORDER !! 1434         for (order = 0; order < MAX_ORDER; ++order)
1597                 seq_printf(m, "%6d ", order);    1435                 seq_printf(m, "%6d ", order);
1598         seq_putc(m, '\n');                       1436         seq_putc(m, '\n');
1599                                                  1437 
1600         walk_zones_in_node(m, pgdat, true, fa    1438         walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
                                                   >> 1439 
                                                   >> 1440         return 0;
1601 }                                                1441 }
1602                                                  1442 
1603 static void pagetypeinfo_showblockcount_print    1443 static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1604                                         pg_da    1444                                         pg_data_t *pgdat, struct zone *zone)
1605 {                                                1445 {
1606         int mtype;                               1446         int mtype;
1607         unsigned long pfn;                       1447         unsigned long pfn;
1608         unsigned long start_pfn = zone->zone_    1448         unsigned long start_pfn = zone->zone_start_pfn;
1609         unsigned long end_pfn = zone_end_pfn(    1449         unsigned long end_pfn = zone_end_pfn(zone);
1610         unsigned long count[MIGRATE_TYPES] =     1450         unsigned long count[MIGRATE_TYPES] = { 0, };
1611                                                  1451 
1612         for (pfn = start_pfn; pfn < end_pfn;     1452         for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1613                 struct page *page;               1453                 struct page *page;
1614                                                  1454 
1615                 page = pfn_to_online_page(pfn    1455                 page = pfn_to_online_page(pfn);
1616                 if (!page)                       1456                 if (!page)
1617                         continue;                1457                         continue;
1618                                                  1458 
                                                   >> 1459                 /* Watch for unexpected holes punched in the memmap */
                                                   >> 1460                 if (!memmap_valid_within(pfn, page, zone))
                                                   >> 1461                         continue;
                                                   >> 1462 
1619                 if (page_zone(page) != zone)     1463                 if (page_zone(page) != zone)
1620                         continue;                1464                         continue;
1621                                                  1465 
1622                 mtype = get_pageblock_migrate    1466                 mtype = get_pageblock_migratetype(page);
1623                                                  1467 
1624                 if (mtype < MIGRATE_TYPES)       1468                 if (mtype < MIGRATE_TYPES)
1625                         count[mtype]++;          1469                         count[mtype]++;
1626         }                                        1470         }
1627                                                  1471 
1628         /* Print counts */                       1472         /* Print counts */
1629         seq_printf(m, "Node %d, zone %8s ", p    1473         seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1630         for (mtype = 0; mtype < MIGRATE_TYPES    1474         for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1631                 seq_printf(m, "%12lu ", count    1475                 seq_printf(m, "%12lu ", count[mtype]);
1632         seq_putc(m, '\n');                       1476         seq_putc(m, '\n');
1633 }                                                1477 }
1634                                                  1478 
1635 /* Print out the number of pageblocks for eac    1479 /* Print out the number of pageblocks for each migratetype */
1636 static void pagetypeinfo_showblockcount(struc !! 1480 static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1637 {                                                1481 {
1638         int mtype;                               1482         int mtype;
1639         pg_data_t *pgdat = (pg_data_t *)arg;     1483         pg_data_t *pgdat = (pg_data_t *)arg;
1640                                                  1484 
1641         seq_printf(m, "\n%-23s", "Number of b    1485         seq_printf(m, "\n%-23s", "Number of blocks type ");
1642         for (mtype = 0; mtype < MIGRATE_TYPES    1486         for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1643                 seq_printf(m, "%12s ", migrat    1487                 seq_printf(m, "%12s ", migratetype_names[mtype]);
1644         seq_putc(m, '\n');                       1488         seq_putc(m, '\n');
1645         walk_zones_in_node(m, pgdat, true, fa    1489         walk_zones_in_node(m, pgdat, true, false,
1646                 pagetypeinfo_showblockcount_p    1490                 pagetypeinfo_showblockcount_print);
                                                   >> 1491 
                                                   >> 1492         return 0;
1647 }                                                1493 }
1648                                                  1494 
1649 /*                                               1495 /*
1650  * Print out the number of pageblocks for eac    1496  * Print out the number of pageblocks for each migratetype that contain pages
1651  * of other types. This gives an indication o    1497  * of other types. This gives an indication of how well fallbacks are being
1652  * contained by rmqueue_fallback(). It requir    1498  * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
1653  * to determine what is going on                 1499  * to determine what is going on
1654  */                                              1500  */
1655 static void pagetypeinfo_showmixedcount(struc    1501 static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1656 {                                                1502 {
1657 #ifdef CONFIG_PAGE_OWNER                         1503 #ifdef CONFIG_PAGE_OWNER
1658         int mtype;                               1504         int mtype;
1659                                                  1505 
1660         if (!static_branch_unlikely(&page_own    1506         if (!static_branch_unlikely(&page_owner_inited))
1661                 return;                          1507                 return;
1662                                                  1508 
1663         drain_all_pages(NULL);                   1509         drain_all_pages(NULL);
1664                                                  1510 
1665         seq_printf(m, "\n%-23s", "Number of m    1511         seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1666         for (mtype = 0; mtype < MIGRATE_TYPES    1512         for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1667                 seq_printf(m, "%12s ", migrat    1513                 seq_printf(m, "%12s ", migratetype_names[mtype]);
1668         seq_putc(m, '\n');                       1514         seq_putc(m, '\n');
1669                                                  1515 
1670         walk_zones_in_node(m, pgdat, true, tr    1516         walk_zones_in_node(m, pgdat, true, true,
1671                 pagetypeinfo_showmixedcount_p    1517                 pagetypeinfo_showmixedcount_print);
1672 #endif /* CONFIG_PAGE_OWNER */                   1518 #endif /* CONFIG_PAGE_OWNER */
1673 }                                                1519 }
1674                                                  1520 
1675 /*                                               1521 /*
1676  * This prints out statistics in relation to     1522  * This prints out statistics in relation to grouping pages by mobility.
1677  * It is expensive to collect so do not const    1523  * It is expensive to collect so do not constantly read the file.
1678  */                                              1524  */
1679 static int pagetypeinfo_show(struct seq_file     1525 static int pagetypeinfo_show(struct seq_file *m, void *arg)
1680 {                                                1526 {
1681         pg_data_t *pgdat = (pg_data_t *)arg;     1527         pg_data_t *pgdat = (pg_data_t *)arg;
1682                                                  1528 
1683         /* check memoryless node */              1529         /* check memoryless node */
1684         if (!node_state(pgdat->node_id, N_MEM    1530         if (!node_state(pgdat->node_id, N_MEMORY))
1685                 return 0;                        1531                 return 0;
1686                                                  1532 
1687         seq_printf(m, "Page block order: %d\n    1533         seq_printf(m, "Page block order: %d\n", pageblock_order);
1688         seq_printf(m, "Pages per block:  %lu\    1534         seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
1689         seq_putc(m, '\n');                       1535         seq_putc(m, '\n');
1690         pagetypeinfo_showfree(m, pgdat);         1536         pagetypeinfo_showfree(m, pgdat);
1691         pagetypeinfo_showblockcount(m, pgdat)    1537         pagetypeinfo_showblockcount(m, pgdat);
1692         pagetypeinfo_showmixedcount(m, pgdat)    1538         pagetypeinfo_showmixedcount(m, pgdat);
1693                                                  1539 
1694         return 0;                                1540         return 0;
1695 }                                                1541 }
1696                                                  1542 
1697 static const struct seq_operations fragmentat    1543 static const struct seq_operations fragmentation_op = {
1698         .start  = frag_start,                    1544         .start  = frag_start,
1699         .next   = frag_next,                     1545         .next   = frag_next,
1700         .stop   = frag_stop,                     1546         .stop   = frag_stop,
1701         .show   = frag_show,                     1547         .show   = frag_show,
1702 };                                               1548 };
1703                                                  1549 
1704 static const struct seq_operations pagetypein    1550 static const struct seq_operations pagetypeinfo_op = {
1705         .start  = frag_start,                    1551         .start  = frag_start,
1706         .next   = frag_next,                     1552         .next   = frag_next,
1707         .stop   = frag_stop,                     1553         .stop   = frag_stop,
1708         .show   = pagetypeinfo_show,             1554         .show   = pagetypeinfo_show,
1709 };                                               1555 };
1710                                                  1556 
1711 static bool is_zone_first_populated(pg_data_t    1557 static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1712 {                                                1558 {
1713         int zid;                                 1559         int zid;
1714                                                  1560 
1715         for (zid = 0; zid < MAX_NR_ZONES; zid    1561         for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1716                 struct zone *compare = &pgdat    1562                 struct zone *compare = &pgdat->node_zones[zid];
1717                                                  1563 
1718                 if (populated_zone(compare))     1564                 if (populated_zone(compare))
1719                         return zone == compar    1565                         return zone == compare;
1720         }                                        1566         }
1721                                                  1567 
1722         return false;                            1568         return false;
1723 }                                                1569 }
1724                                                  1570 
1725 static void zoneinfo_show_print(struct seq_fi    1571 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1726                                                  1572                                                         struct zone *zone)
1727 {                                                1573 {
1728         int i;                                   1574         int i;
1729         seq_printf(m, "Node %d, zone %8s", pg    1575         seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1730         if (is_zone_first_populated(pgdat, zo    1576         if (is_zone_first_populated(pgdat, zone)) {
1731                 seq_printf(m, "\n  per-node s    1577                 seq_printf(m, "\n  per-node stats");
1732                 for (i = 0; i < NR_VM_NODE_ST    1578                 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1733                         unsigned long pages = << 
1734                                               << 
1735                         if (vmstat_item_print << 
1736                                 pages /= HPAG << 
1737                         seq_printf(m, "\n        1579                         seq_printf(m, "\n      %-12s %lu", node_stat_name(i),
1738                                    pages);    !! 1580                                    node_page_state(pgdat, i));
1739                 }                                1581                 }
1740         }                                        1582         }
1741         seq_printf(m,                            1583         seq_printf(m,
1742                    "\n  pages free     %lu"      1584                    "\n  pages free     %lu"
1743                    "\n        boost    %lu"   << 
1744                    "\n        min      %lu"      1585                    "\n        min      %lu"
1745                    "\n        low      %lu"      1586                    "\n        low      %lu"
1746                    "\n        high     %lu"      1587                    "\n        high     %lu"
1747                    "\n        promo    %lu"   << 
1748                    "\n        spanned  %lu"      1588                    "\n        spanned  %lu"
1749                    "\n        present  %lu"      1589                    "\n        present  %lu"
1750                    "\n        managed  %lu"   !! 1590                    "\n        managed  %lu",
1751                    "\n        cma      %lu",  << 
1752                    zone_page_state(zone, NR_F    1591                    zone_page_state(zone, NR_FREE_PAGES),
1753                    zone->watermark_boost,     << 
1754                    min_wmark_pages(zone),        1592                    min_wmark_pages(zone),
1755                    low_wmark_pages(zone),        1593                    low_wmark_pages(zone),
1756                    high_wmark_pages(zone),       1594                    high_wmark_pages(zone),
1757                    promo_wmark_pages(zone),   << 
1758                    zone->spanned_pages,          1595                    zone->spanned_pages,
1759                    zone->present_pages,          1596                    zone->present_pages,
1760                    zone_managed_pages(zone),  !! 1597                    zone_managed_pages(zone));
1761                    zone_cma_pages(zone));     << 
1762                                                  1598 
1763         seq_printf(m,                            1599         seq_printf(m,
1764                    "\n        protection: (%l    1600                    "\n        protection: (%ld",
1765                    zone->lowmem_reserve[0]);     1601                    zone->lowmem_reserve[0]);
1766         for (i = 1; i < ARRAY_SIZE(zone->lowm    1602         for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1767                 seq_printf(m, ", %ld", zone->    1603                 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1768         seq_putc(m, ')');                        1604         seq_putc(m, ')');
1769                                                  1605 
1770         /* If unpopulated, no other informati    1606         /* If unpopulated, no other information is useful */
1771         if (!populated_zone(zone)) {             1607         if (!populated_zone(zone)) {
1772                 seq_putc(m, '\n');               1608                 seq_putc(m, '\n');
1773                 return;                          1609                 return;
1774         }                                        1610         }
1775                                                  1611 
1776         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS    1612         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1777                 seq_printf(m, "\n      %-12s     1613                 seq_printf(m, "\n      %-12s %lu", zone_stat_name(i),
1778                            zone_page_state(zo    1614                            zone_page_state(zone, i));
1779                                                  1615 
1780 #ifdef CONFIG_NUMA                               1616 #ifdef CONFIG_NUMA
1781         for (i = 0; i < NR_VM_NUMA_EVENT_ITEM !! 1617         for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1782                 seq_printf(m, "\n      %-12s     1618                 seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
1783                            zone_numa_event_st !! 1619                            zone_numa_state_snapshot(zone, i));
1784 #endif                                           1620 #endif
1785                                                  1621 
1786         seq_printf(m, "\n  pagesets");           1622         seq_printf(m, "\n  pagesets");
1787         for_each_online_cpu(i) {                 1623         for_each_online_cpu(i) {
1788                 struct per_cpu_pages *pcp;    !! 1624                 struct per_cpu_pageset *pageset;
1789                 struct per_cpu_zonestat __may << 
1790                                                  1625 
1791                 pcp = per_cpu_ptr(zone->per_c !! 1626                 pageset = per_cpu_ptr(zone->pageset, i);
1792                 seq_printf(m,                    1627                 seq_printf(m,
1793                            "\n    cpu: %i"       1628                            "\n    cpu: %i"
1794                            "\n              c    1629                            "\n              count: %i"
1795                            "\n              h    1630                            "\n              high:  %i"
1796                            "\n              b    1631                            "\n              batch: %i",
1797                            i,                    1632                            i,
1798                            pcp->count,        !! 1633                            pageset->pcp.count,
1799                            pcp->high,         !! 1634                            pageset->pcp.high,
1800                            pcp->batch);       !! 1635                            pageset->pcp.batch);
1801 #ifdef CONFIG_SMP                                1636 #ifdef CONFIG_SMP
1802                 pzstats = per_cpu_ptr(zone->p << 
1803                 seq_printf(m, "\n  vm stats t    1637                 seq_printf(m, "\n  vm stats threshold: %d",
1804                                 pzstats->stat !! 1638                                 pageset->stat_threshold);
1805 #endif                                           1639 #endif
1806         }                                        1640         }
1807         seq_printf(m,                            1641         seq_printf(m,
1808                    "\n  node_unreclaimable:      1642                    "\n  node_unreclaimable:  %u"
1809                    "\n  start_pfn:               1643                    "\n  start_pfn:           %lu",
1810                    pgdat->kswapd_failures >=     1644                    pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1811                    zone->zone_start_pfn);        1645                    zone->zone_start_pfn);
1812         seq_putc(m, '\n');                       1646         seq_putc(m, '\n');
1813 }                                                1647 }
1814                                                  1648 
1815 /*                                               1649 /*
1816  * Output information about zones in @pgdat.     1650  * Output information about zones in @pgdat.  All zones are printed regardless
1817  * of whether they are populated or not: lowm    1651  * of whether they are populated or not: lowmem_reserve_ratio operates on the
1818  * set of all zones and userspace would not b    1652  * set of all zones and userspace would not be aware of such zones if they are
1819  * suppressed here (zoneinfo displays the eff    1653  * suppressed here (zoneinfo displays the effect of lowmem_reserve_ratio).
1820  */                                              1654  */
1821 static int zoneinfo_show(struct seq_file *m,     1655 static int zoneinfo_show(struct seq_file *m, void *arg)
1822 {                                                1656 {
1823         pg_data_t *pgdat = (pg_data_t *)arg;     1657         pg_data_t *pgdat = (pg_data_t *)arg;
1824         walk_zones_in_node(m, pgdat, false, f    1658         walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1825         return 0;                                1659         return 0;
1826 }                                                1660 }
1827                                                  1661 
1828 static const struct seq_operations zoneinfo_o    1662 static const struct seq_operations zoneinfo_op = {
1829         .start  = frag_start, /* iterate over    1663         .start  = frag_start, /* iterate over all zones. The same as in
1830                                * fragmentatio    1664                                * fragmentation. */
1831         .next   = frag_next,                     1665         .next   = frag_next,
1832         .stop   = frag_stop,                     1666         .stop   = frag_stop,
1833         .show   = zoneinfo_show,                 1667         .show   = zoneinfo_show,
1834 };                                               1668 };
1835                                                  1669 
1836 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEM    1670 #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
1837                          NR_VM_NUMA_EVENT_ITE !! 1671                          NR_VM_NUMA_STAT_ITEMS + \
1838                          NR_VM_NODE_STAT_ITEM    1672                          NR_VM_NODE_STAT_ITEMS + \
1839                          NR_VM_STAT_ITEMS + \ !! 1673                          NR_VM_WRITEBACK_STAT_ITEMS + \
1840                          (IS_ENABLED(CONFIG_V    1674                          (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
1841                           NR_VM_EVENT_ITEMS :    1675                           NR_VM_EVENT_ITEMS : 0))
1842                                                  1676 
1843 static void *vmstat_start(struct seq_file *m,    1677 static void *vmstat_start(struct seq_file *m, loff_t *pos)
1844 {                                                1678 {
1845         unsigned long *v;                        1679         unsigned long *v;
1846         int i;                                   1680         int i;
1847                                                  1681 
1848         if (*pos >= NR_VMSTAT_ITEMS)             1682         if (*pos >= NR_VMSTAT_ITEMS)
1849                 return NULL;                     1683                 return NULL;
1850                                                  1684 
1851         BUILD_BUG_ON(ARRAY_SIZE(vmstat_text)     1685         BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
1852         fold_vm_numa_events();                << 
1853         v = kmalloc_array(NR_VMSTAT_ITEMS, si    1686         v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
1854         m->private = v;                          1687         m->private = v;
1855         if (!v)                                  1688         if (!v)
1856                 return ERR_PTR(-ENOMEM);         1689                 return ERR_PTR(-ENOMEM);
1857         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS    1690         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1858                 v[i] = global_zone_page_state    1691                 v[i] = global_zone_page_state(i);
1859         v += NR_VM_ZONE_STAT_ITEMS;              1692         v += NR_VM_ZONE_STAT_ITEMS;
1860                                                  1693 
1861 #ifdef CONFIG_NUMA                               1694 #ifdef CONFIG_NUMA
1862         for (i = 0; i < NR_VM_NUMA_EVENT_ITEM !! 1695         for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1863                 v[i] = global_numa_event_stat !! 1696                 v[i] = global_numa_state(i);
1864         v += NR_VM_NUMA_EVENT_ITEMS;          !! 1697         v += NR_VM_NUMA_STAT_ITEMS;
1865 #endif                                           1698 #endif
1866                                                  1699 
1867         for (i = 0; i < NR_VM_NODE_STAT_ITEMS !! 1700         for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
1868                 v[i] = global_node_page_state !! 1701                 v[i] = global_node_page_state(i);
1869                 if (vmstat_item_print_in_thp( << 
1870                         v[i] /= HPAGE_PMD_NR; << 
1871         }                                     << 
1872         v += NR_VM_NODE_STAT_ITEMS;              1702         v += NR_VM_NODE_STAT_ITEMS;
1873                                                  1703 
1874         global_dirty_limits(v + NR_DIRTY_BG_T    1704         global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1875                             v + NR_DIRTY_THRE    1705                             v + NR_DIRTY_THRESHOLD);
1876         v[NR_MEMMAP_PAGES] = atomic_long_read !! 1706         v += NR_VM_WRITEBACK_STAT_ITEMS;
1877         v[NR_MEMMAP_BOOT_PAGES] = atomic_long << 
1878         v += NR_VM_STAT_ITEMS;                << 
1879                                                  1707 
1880 #ifdef CONFIG_VM_EVENT_COUNTERS                  1708 #ifdef CONFIG_VM_EVENT_COUNTERS
1881         all_vm_events(v);                        1709         all_vm_events(v);
1882         v[PGPGIN] /= 2;         /* sectors ->    1710         v[PGPGIN] /= 2;         /* sectors -> kbytes */
1883         v[PGPGOUT] /= 2;                         1711         v[PGPGOUT] /= 2;
1884 #endif                                           1712 #endif
1885         return (unsigned long *)m->private +     1713         return (unsigned long *)m->private + *pos;
1886 }                                                1714 }
1887                                                  1715 
1888 static void *vmstat_next(struct seq_file *m,     1716 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1889 {                                                1717 {
1890         (*pos)++;                                1718         (*pos)++;
1891         if (*pos >= NR_VMSTAT_ITEMS)             1719         if (*pos >= NR_VMSTAT_ITEMS)
1892                 return NULL;                     1720                 return NULL;
1893         return (unsigned long *)m->private +     1721         return (unsigned long *)m->private + *pos;
1894 }                                                1722 }
1895                                                  1723 
1896 static int vmstat_show(struct seq_file *m, vo    1724 static int vmstat_show(struct seq_file *m, void *arg)
1897 {                                                1725 {
1898         unsigned long *l = arg;                  1726         unsigned long *l = arg;
1899         unsigned long off = l - (unsigned lon    1727         unsigned long off = l - (unsigned long *)m->private;
1900                                                  1728 
1901         seq_puts(m, vmstat_text[off]);           1729         seq_puts(m, vmstat_text[off]);
1902         seq_put_decimal_ull(m, " ", *l);         1730         seq_put_decimal_ull(m, " ", *l);
1903         seq_putc(m, '\n');                       1731         seq_putc(m, '\n');
1904                                                  1732 
1905         if (off == NR_VMSTAT_ITEMS - 1) {        1733         if (off == NR_VMSTAT_ITEMS - 1) {
1906                 /*                               1734                 /*
1907                  * We've come to the end - ad    1735                  * We've come to the end - add any deprecated counters to avoid
1908                  * breaking userspace which m    1736                  * breaking userspace which might depend on them being present.
1909                  */                              1737                  */
1910                 seq_puts(m, "nr_unstable 0\n"    1738                 seq_puts(m, "nr_unstable 0\n");
1911         }                                        1739         }
1912         return 0;                                1740         return 0;
1913 }                                                1741 }
1914                                                  1742 
1915 static void vmstat_stop(struct seq_file *m, v    1743 static void vmstat_stop(struct seq_file *m, void *arg)
1916 {                                                1744 {
1917         kfree(m->private);                       1745         kfree(m->private);
1918         m->private = NULL;                       1746         m->private = NULL;
1919 }                                                1747 }
1920                                                  1748 
1921 static const struct seq_operations vmstat_op     1749 static const struct seq_operations vmstat_op = {
1922         .start  = vmstat_start,                  1750         .start  = vmstat_start,
1923         .next   = vmstat_next,                   1751         .next   = vmstat_next,
1924         .stop   = vmstat_stop,                   1752         .stop   = vmstat_stop,
1925         .show   = vmstat_show,                   1753         .show   = vmstat_show,
1926 };                                               1754 };
1927 #endif /* CONFIG_PROC_FS */                      1755 #endif /* CONFIG_PROC_FS */
1928                                                  1756 
1929 #ifdef CONFIG_SMP                                1757 #ifdef CONFIG_SMP
1930 static DEFINE_PER_CPU(struct delayed_work, vm    1758 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1931 int sysctl_stat_interval __read_mostly = HZ;     1759 int sysctl_stat_interval __read_mostly = HZ;
1932                                                  1760 
1933 #ifdef CONFIG_PROC_FS                            1761 #ifdef CONFIG_PROC_FS
1934 static void refresh_vm_stats(struct work_stru    1762 static void refresh_vm_stats(struct work_struct *work)
1935 {                                                1763 {
1936         refresh_cpu_vm_stats(true);              1764         refresh_cpu_vm_stats(true);
1937 }                                                1765 }
1938                                                  1766 
1939 int vmstat_refresh(const struct ctl_table *ta !! 1767 int vmstat_refresh(struct ctl_table *table, int write,
1940                    void *buffer, size_t *lenp    1768                    void *buffer, size_t *lenp, loff_t *ppos)
1941 {                                                1769 {
1942         long val;                                1770         long val;
1943         int err;                                 1771         int err;
1944         int i;                                   1772         int i;
1945                                                  1773 
1946         /*                                       1774         /*
1947          * The regular update, every sysctl_s    1775          * The regular update, every sysctl_stat_interval, may come later
1948          * than expected: leaving a significa    1776          * than expected: leaving a significant amount in per_cpu buckets.
1949          * This is particularly misleading wh    1777          * This is particularly misleading when checking a quantity of HUGE
1950          * pages, immediately after running a    1778          * pages, immediately after running a test.  /proc/sys/vm/stat_refresh,
1951          * which can equally be echo'ed to or    1779          * which can equally be echo'ed to or cat'ted from (by root),
1952          * can be used to update the stats ju    1780          * can be used to update the stats just before reading them.
1953          *                                       1781          *
1954          * Oh, and since global_zone_page_sta    1782          * Oh, and since global_zone_page_state() etc. are so careful to hide
1955          * transiently negative values, repor    1783          * transiently negative values, report an error here if any of
1956          * the stats is negative, so we know     1784          * the stats is negative, so we know to go looking for imbalance.
1957          */                                      1785          */
1958         err = schedule_on_each_cpu(refresh_vm    1786         err = schedule_on_each_cpu(refresh_vm_stats);
1959         if (err)                                 1787         if (err)
1960                 return err;                      1788                 return err;
1961         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS    1789         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1962                 /*                            << 
1963                  * Skip checking stats known  << 
1964                  */                           << 
1965                 switch (i) {                  << 
1966                 case NR_ZONE_WRITE_PENDING:   << 
1967                 case NR_FREE_CMA_PAGES:       << 
1968                         continue;             << 
1969                 }                             << 
1970                 val = atomic_long_read(&vm_zo    1790                 val = atomic_long_read(&vm_zone_stat[i]);
1971                 if (val < 0) {                   1791                 if (val < 0) {
1972                         pr_warn("%s: %s %ld\n    1792                         pr_warn("%s: %s %ld\n",
1973                                 __func__, zon    1793                                 __func__, zone_stat_name(i), val);
                                                   >> 1794                         err = -EINVAL;
1974                 }                                1795                 }
1975         }                                        1796         }
1976         for (i = 0; i < NR_VM_NODE_STAT_ITEMS !! 1797 #ifdef CONFIG_NUMA
1977                 /*                            !! 1798         for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
1978                  * Skip checking stats known  !! 1799                 val = atomic_long_read(&vm_numa_stat[i]);
1979                  */                           << 
1980                 switch (i) {                  << 
1981                 case NR_WRITEBACK:            << 
1982                         continue;             << 
1983                 }                             << 
1984                 val = atomic_long_read(&vm_no << 
1985                 if (val < 0) {                   1800                 if (val < 0) {
1986                         pr_warn("%s: %s %ld\n    1801                         pr_warn("%s: %s %ld\n",
1987                                 __func__, nod !! 1802                                 __func__, numa_stat_name(i), val);
                                                   >> 1803                         err = -EINVAL;
1988                 }                                1804                 }
1989         }                                        1805         }
                                                   >> 1806 #endif
                                                   >> 1807         if (err)
                                                   >> 1808                 return err;
1990         if (write)                               1809         if (write)
1991                 *ppos += *lenp;                  1810                 *ppos += *lenp;
1992         else                                     1811         else
1993                 *lenp = 0;                       1812                 *lenp = 0;
1994         return 0;                                1813         return 0;
1995 }                                                1814 }
1996 #endif /* CONFIG_PROC_FS */                      1815 #endif /* CONFIG_PROC_FS */
1997                                                  1816 
1998 static void vmstat_update(struct work_struct     1817 static void vmstat_update(struct work_struct *w)
1999 {                                                1818 {
2000         if (refresh_cpu_vm_stats(true)) {        1819         if (refresh_cpu_vm_stats(true)) {
2001                 /*                               1820                 /*
2002                  * Counters were updated so w    1821                  * Counters were updated so we expect more updates
2003                  * to occur in the future. Ke    1822                  * to occur in the future. Keep on running the
2004                  * update worker thread.         1823                  * update worker thread.
2005                  */                              1824                  */
2006                 queue_delayed_work_on(smp_pro    1825                 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
2007                                 this_cpu_ptr(    1826                                 this_cpu_ptr(&vmstat_work),
2008                                 round_jiffies    1827                                 round_jiffies_relative(sysctl_stat_interval));
2009         }                                        1828         }
2010 }                                                1829 }
2011                                                  1830 
2012 /*                                               1831 /*
                                                   >> 1832  * Switch off vmstat processing and then fold all the remaining differentials
                                                   >> 1833  * until the diffs stay at zero. The function is used by NOHZ and can only be
                                                   >> 1834  * invoked when tick processing is not active.
                                                   >> 1835  */
                                                   >> 1836 /*
2013  * Check if the diffs for a certain cpu indic    1837  * Check if the diffs for a certain cpu indicate that
2014  * an update is needed.                          1838  * an update is needed.
2015  */                                              1839  */
2016 static bool need_update(int cpu)                 1840 static bool need_update(int cpu)
2017 {                                                1841 {
2018         pg_data_t *last_pgdat = NULL;         << 
2019         struct zone *zone;                       1842         struct zone *zone;
2020                                                  1843 
2021         for_each_populated_zone(zone) {          1844         for_each_populated_zone(zone) {
2022                 struct per_cpu_zonestat *pzst !! 1845                 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
2023                 struct per_cpu_nodestat *n;   !! 1846 
                                                   >> 1847                 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
                                                   >> 1848 #ifdef CONFIG_NUMA
                                                   >> 1849                 BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
                                                   >> 1850 #endif
2024                                                  1851 
2025                 /*                               1852                 /*
2026                  * The fast way of checking i    1853                  * The fast way of checking if there are any vmstat diffs.
2027                  */                              1854                  */
2028                 if (memchr_inv(pzstats->vm_st !! 1855                 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
                                                   >> 1856                                sizeof(p->vm_stat_diff[0])))
2029                         return true;             1857                         return true;
2030                                               !! 1858 #ifdef CONFIG_NUMA
2031                 if (last_pgdat == zone->zone_ !! 1859                 if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
2032                         continue;             !! 1860                                sizeof(p->vm_numa_stat_diff[0])))
2033                 last_pgdat = zone->zone_pgdat << 
2034                 n = per_cpu_ptr(zone->zone_pg << 
2035                 if (memchr_inv(n->vm_node_sta << 
2036                         return true;             1861                         return true;
                                                   >> 1862 #endif
2037         }                                        1863         }
2038         return false;                            1864         return false;
2039 }                                                1865 }
2040                                                  1866 
2041 /*                                               1867 /*
2042  * Switch off vmstat processing and then fold    1868  * Switch off vmstat processing and then fold all the remaining differentials
2043  * until the diffs stay at zero. The function    1869  * until the diffs stay at zero. The function is used by NOHZ and can only be
2044  * invoked when tick processing is not active    1870  * invoked when tick processing is not active.
2045  */                                              1871  */
2046 void quiet_vmstat(void)                          1872 void quiet_vmstat(void)
2047 {                                                1873 {
2048         if (system_state != SYSTEM_RUNNING)      1874         if (system_state != SYSTEM_RUNNING)
2049                 return;                          1875                 return;
2050                                                  1876 
2051         if (!delayed_work_pending(this_cpu_pt    1877         if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
2052                 return;                          1878                 return;
2053                                                  1879 
2054         if (!need_update(smp_processor_id()))    1880         if (!need_update(smp_processor_id()))
2055                 return;                          1881                 return;
2056                                                  1882 
2057         /*                                       1883         /*
2058          * Just refresh counters and do not c    1884          * Just refresh counters and do not care about the pending delayed
2059          * vmstat_update. It doesn't fire tha    1885          * vmstat_update. It doesn't fire that often to matter and canceling
2060          * it would be too expensive from thi    1886          * it would be too expensive from this path.
2061          * vmstat_shepherd will take care abo    1887          * vmstat_shepherd will take care about that for us.
2062          */                                      1888          */
2063         refresh_cpu_vm_stats(false);             1889         refresh_cpu_vm_stats(false);
2064 }                                                1890 }
2065                                                  1891 
2066 /*                                               1892 /*
2067  * Shepherd worker thread that checks the        1893  * Shepherd worker thread that checks the
2068  * differentials of processors that have thei    1894  * differentials of processors that have their worker
2069  * threads for vm statistics updates disabled    1895  * threads for vm statistics updates disabled because of
2070  * inactivity.                                   1896  * inactivity.
2071  */                                              1897  */
2072 static void vmstat_shepherd(struct work_struc    1898 static void vmstat_shepherd(struct work_struct *w);
2073                                                  1899 
2074 static DECLARE_DEFERRABLE_WORK(shepherd, vmst    1900 static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
2075                                                  1901 
2076 static void vmstat_shepherd(struct work_struc    1902 static void vmstat_shepherd(struct work_struct *w)
2077 {                                                1903 {
2078         int cpu;                                 1904         int cpu;
2079                                                  1905 
2080         cpus_read_lock();                     !! 1906         get_online_cpus();
2081         /* Check processors whose vmstat work    1907         /* Check processors whose vmstat worker threads have been disabled */
2082         for_each_online_cpu(cpu) {               1908         for_each_online_cpu(cpu) {
2083                 struct delayed_work *dw = &pe    1909                 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
2084                                                  1910 
2085                 /*                            << 
2086                  * In kernel users of vmstat  << 
2087                  * they are using zone_page_s << 
2088                  * an imprecision as the regu << 
2089                  * cumulative error can grow  << 
2090                  *                            << 
2091                  * From that POV the regular  << 
2092                  * been isolated from the ker << 
2093                  * infrastructure ever notici << 
2094                  * for all isolated CPUs to a << 
2095                  */                           << 
2096                 if (cpu_is_isolated(cpu))     << 
2097                         continue;             << 
2098                                               << 
2099                 if (!delayed_work_pending(dw)    1911                 if (!delayed_work_pending(dw) && need_update(cpu))
2100                         queue_delayed_work_on    1912                         queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
2101                                               << 
2102                 cond_resched();               << 
2103         }                                        1913         }
2104         cpus_read_unlock();                   !! 1914         put_online_cpus();
2105                                                  1915 
2106         schedule_delayed_work(&shepherd,         1916         schedule_delayed_work(&shepherd,
2107                 round_jiffies_relative(sysctl    1917                 round_jiffies_relative(sysctl_stat_interval));
2108 }                                                1918 }
2109                                                  1919 
2110 static void __init start_shepherd_timer(void)    1920 static void __init start_shepherd_timer(void)
2111 {                                                1921 {
2112         int cpu;                                 1922         int cpu;
2113                                                  1923 
2114         for_each_possible_cpu(cpu)               1924         for_each_possible_cpu(cpu)
2115                 INIT_DEFERRABLE_WORK(per_cpu_    1925                 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
2116                         vmstat_update);          1926                         vmstat_update);
2117                                                  1927 
2118         schedule_delayed_work(&shepherd,         1928         schedule_delayed_work(&shepherd,
2119                 round_jiffies_relative(sysctl    1929                 round_jiffies_relative(sysctl_stat_interval));
2120 }                                                1930 }
2121                                                  1931 
2122 static void __init init_cpu_node_state(void)     1932 static void __init init_cpu_node_state(void)
2123 {                                                1933 {
2124         int node;                                1934         int node;
2125                                                  1935 
2126         for_each_online_node(node) {             1936         for_each_online_node(node) {
2127                 if (!cpumask_empty(cpumask_of !! 1937                 if (cpumask_weight(cpumask_of_node(node)) > 0)
2128                         node_set_state(node,     1938                         node_set_state(node, N_CPU);
2129         }                                        1939         }
2130 }                                                1940 }
2131                                                  1941 
2132 static int vmstat_cpu_online(unsigned int cpu    1942 static int vmstat_cpu_online(unsigned int cpu)
2133 {                                                1943 {
2134         refresh_zone_stat_thresholds();          1944         refresh_zone_stat_thresholds();
2135                                               !! 1945         node_set_state(cpu_to_node(cpu), N_CPU);
2136         if (!node_state(cpu_to_node(cpu), N_C << 
2137                 node_set_state(cpu_to_node(cp << 
2138         }                                     << 
2139                                               << 
2140         return 0;                                1946         return 0;
2141 }                                                1947 }
2142                                                  1948 
2143 static int vmstat_cpu_down_prep(unsigned int     1949 static int vmstat_cpu_down_prep(unsigned int cpu)
2144 {                                                1950 {
2145         cancel_delayed_work_sync(&per_cpu(vms    1951         cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
2146         return 0;                                1952         return 0;
2147 }                                                1953 }
2148                                                  1954 
2149 static int vmstat_cpu_dead(unsigned int cpu)     1955 static int vmstat_cpu_dead(unsigned int cpu)
2150 {                                                1956 {
2151         const struct cpumask *node_cpus;         1957         const struct cpumask *node_cpus;
2152         int node;                                1958         int node;
2153                                                  1959 
2154         node = cpu_to_node(cpu);                 1960         node = cpu_to_node(cpu);
2155                                                  1961 
2156         refresh_zone_stat_thresholds();          1962         refresh_zone_stat_thresholds();
2157         node_cpus = cpumask_of_node(node);       1963         node_cpus = cpumask_of_node(node);
2158         if (!cpumask_empty(node_cpus))        !! 1964         if (cpumask_weight(node_cpus) > 0)
2159                 return 0;                        1965                 return 0;
2160                                                  1966 
2161         node_clear_state(node, N_CPU);           1967         node_clear_state(node, N_CPU);
2162                                               << 
2163         return 0;                                1968         return 0;
2164 }                                                1969 }
2165                                                  1970 
2166 #endif                                           1971 #endif
2167                                                  1972 
2168 struct workqueue_struct *mm_percpu_wq;           1973 struct workqueue_struct *mm_percpu_wq;
2169                                                  1974 
2170 void __init init_mm_internals(void)              1975 void __init init_mm_internals(void)
2171 {                                                1976 {
2172         int ret __maybe_unused;                  1977         int ret __maybe_unused;
2173                                                  1978 
2174         mm_percpu_wq = alloc_workqueue("mm_pe    1979         mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2175                                                  1980 
2176 #ifdef CONFIG_SMP                                1981 #ifdef CONFIG_SMP
2177         ret = cpuhp_setup_state_nocalls(CPUHP    1982         ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
2178                                         NULL,    1983                                         NULL, vmstat_cpu_dead);
2179         if (ret < 0)                             1984         if (ret < 0)
2180                 pr_err("vmstat: failed to reg    1985                 pr_err("vmstat: failed to register 'dead' hotplug state\n");
2181                                                  1986 
2182         ret = cpuhp_setup_state_nocalls(CPUHP    1987         ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
2183                                         vmsta    1988                                         vmstat_cpu_online,
2184                                         vmsta    1989                                         vmstat_cpu_down_prep);
2185         if (ret < 0)                             1990         if (ret < 0)
2186                 pr_err("vmstat: failed to reg    1991                 pr_err("vmstat: failed to register 'online' hotplug state\n");
2187                                                  1992 
2188         cpus_read_lock();                     !! 1993         get_online_cpus();
2189         init_cpu_node_state();                   1994         init_cpu_node_state();
2190         cpus_read_unlock();                   !! 1995         put_online_cpus();
2191                                                  1996 
2192         start_shepherd_timer();                  1997         start_shepherd_timer();
2193 #endif                                           1998 #endif
2194 #ifdef CONFIG_PROC_FS                            1999 #ifdef CONFIG_PROC_FS
2195         proc_create_seq("buddyinfo", 0444, NU    2000         proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2196         proc_create_seq("pagetypeinfo", 0400,    2001         proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2197         proc_create_seq("vmstat", 0444, NULL,    2002         proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2198         proc_create_seq("zoneinfo", 0444, NUL    2003         proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
2199 #endif                                           2004 #endif
2200 }                                                2005 }
2201                                                  2006 
2202 #if defined(CONFIG_DEBUG_FS) && defined(CONFI    2007 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2203                                                  2008 
2204 /*                                               2009 /*
2205  * Return an index indicating how much of the    2010  * Return an index indicating how much of the available free memory is
2206  * unusable for an allocation of the requeste    2011  * unusable for an allocation of the requested size.
2207  */                                              2012  */
2208 static int unusable_free_index(unsigned int o    2013 static int unusable_free_index(unsigned int order,
2209                                 struct contig    2014                                 struct contig_page_info *info)
2210 {                                                2015 {
2211         /* No free memory is interpreted as a    2016         /* No free memory is interpreted as all free memory is unusable */
2212         if (info->free_pages == 0)               2017         if (info->free_pages == 0)
2213                 return 1000;                     2018                 return 1000;
2214                                                  2019 
2215         /*                                       2020         /*
2216          * Index should be a value between 0     2021          * Index should be a value between 0 and 1. Return a value to 3
2217          * decimal places.                       2022          * decimal places.
2218          *                                       2023          *
2219          * 0 => no fragmentation                 2024          * 0 => no fragmentation
2220          * 1 => high fragmentation               2025          * 1 => high fragmentation
2221          */                                      2026          */
2222         return div_u64((info->free_pages - (i    2027         return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2223                                                  2028 
2224 }                                                2029 }
2225                                                  2030 
2226 static void unusable_show_print(struct seq_fi    2031 static void unusable_show_print(struct seq_file *m,
2227                                         pg_da    2032                                         pg_data_t *pgdat, struct zone *zone)
2228 {                                                2033 {
2229         unsigned int order;                      2034         unsigned int order;
2230         int index;                               2035         int index;
2231         struct contig_page_info info;            2036         struct contig_page_info info;
2232                                                  2037 
2233         seq_printf(m, "Node %d, zone %8s ",      2038         seq_printf(m, "Node %d, zone %8s ",
2234                                 pgdat->node_i    2039                                 pgdat->node_id,
2235                                 zone->name);     2040                                 zone->name);
2236         for (order = 0; order < NR_PAGE_ORDER !! 2041         for (order = 0; order < MAX_ORDER; ++order) {
2237                 fill_contig_page_info(zone, o    2042                 fill_contig_page_info(zone, order, &info);
2238                 index = unusable_free_index(o    2043                 index = unusable_free_index(order, &info);
2239                 seq_printf(m, "%d.%03d ", ind    2044                 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2240         }                                        2045         }
2241                                                  2046 
2242         seq_putc(m, '\n');                       2047         seq_putc(m, '\n');
2243 }                                                2048 }
2244                                                  2049 
2245 /*                                               2050 /*
2246  * Display unusable free space index             2051  * Display unusable free space index
2247  *                                               2052  *
2248  * The unusable free space index measures how    2053  * The unusable free space index measures how much of the available free
2249  * memory cannot be used to satisfy an alloca    2054  * memory cannot be used to satisfy an allocation of a given size and is a
2250  * value between 0 and 1. The higher the valu    2055  * value between 0 and 1. The higher the value, the more of free memory is
2251  * unusable and by implication, the worse the    2056  * unusable and by implication, the worse the external fragmentation is. This
2252  * can be expressed as a percentage by multip    2057  * can be expressed as a percentage by multiplying by 100.
2253  */                                              2058  */
2254 static int unusable_show(struct seq_file *m,     2059 static int unusable_show(struct seq_file *m, void *arg)
2255 {                                                2060 {
2256         pg_data_t *pgdat = (pg_data_t *)arg;     2061         pg_data_t *pgdat = (pg_data_t *)arg;
2257                                                  2062 
2258         /* check memoryless node */              2063         /* check memoryless node */
2259         if (!node_state(pgdat->node_id, N_MEM    2064         if (!node_state(pgdat->node_id, N_MEMORY))
2260                 return 0;                        2065                 return 0;
2261                                                  2066 
2262         walk_zones_in_node(m, pgdat, true, fa    2067         walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2263                                                  2068 
2264         return 0;                                2069         return 0;
2265 }                                                2070 }
2266                                                  2071 
2267 static const struct seq_operations unusable_s    2072 static const struct seq_operations unusable_sops = {
2268         .start  = frag_start,                    2073         .start  = frag_start,
2269         .next   = frag_next,                     2074         .next   = frag_next,
2270         .stop   = frag_stop,                     2075         .stop   = frag_stop,
2271         .show   = unusable_show,                 2076         .show   = unusable_show,
2272 };                                               2077 };
2273                                                  2078 
2274 DEFINE_SEQ_ATTRIBUTE(unusable);                  2079 DEFINE_SEQ_ATTRIBUTE(unusable);
2275                                                  2080 
2276 static void extfrag_show_print(struct seq_fil    2081 static void extfrag_show_print(struct seq_file *m,
2277                                         pg_da    2082                                         pg_data_t *pgdat, struct zone *zone)
2278 {                                                2083 {
2279         unsigned int order;                      2084         unsigned int order;
2280         int index;                               2085         int index;
2281                                                  2086 
2282         /* Alloc on stack as interrupts are d    2087         /* Alloc on stack as interrupts are disabled for zone walk */
2283         struct contig_page_info info;            2088         struct contig_page_info info;
2284                                                  2089 
2285         seq_printf(m, "Node %d, zone %8s ",      2090         seq_printf(m, "Node %d, zone %8s ",
2286                                 pgdat->node_i    2091                                 pgdat->node_id,
2287                                 zone->name);     2092                                 zone->name);
2288         for (order = 0; order < NR_PAGE_ORDER !! 2093         for (order = 0; order < MAX_ORDER; ++order) {
2289                 fill_contig_page_info(zone, o    2094                 fill_contig_page_info(zone, order, &info);
2290                 index = __fragmentation_index    2095                 index = __fragmentation_index(order, &info);
2291                 seq_printf(m, "%2d.%03d ", in !! 2096                 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2292         }                                        2097         }
2293                                                  2098 
2294         seq_putc(m, '\n');                       2099         seq_putc(m, '\n');
2295 }                                                2100 }
2296                                                  2101 
2297 /*                                               2102 /*
2298  * Display fragmentation index for orders tha    2103  * Display fragmentation index for orders that allocations would fail for
2299  */                                              2104  */
2300 static int extfrag_show(struct seq_file *m, v    2105 static int extfrag_show(struct seq_file *m, void *arg)
2301 {                                                2106 {
2302         pg_data_t *pgdat = (pg_data_t *)arg;     2107         pg_data_t *pgdat = (pg_data_t *)arg;
2303                                                  2108 
2304         walk_zones_in_node(m, pgdat, true, fa    2109         walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2305                                                  2110 
2306         return 0;                                2111         return 0;
2307 }                                                2112 }
2308                                                  2113 
2309 static const struct seq_operations extfrag_so    2114 static const struct seq_operations extfrag_sops = {
2310         .start  = frag_start,                    2115         .start  = frag_start,
2311         .next   = frag_next,                     2116         .next   = frag_next,
2312         .stop   = frag_stop,                     2117         .stop   = frag_stop,
2313         .show   = extfrag_show,                  2118         .show   = extfrag_show,
2314 };                                               2119 };
2315                                                  2120 
2316 DEFINE_SEQ_ATTRIBUTE(extfrag);                   2121 DEFINE_SEQ_ATTRIBUTE(extfrag);
2317                                                  2122 
2318 static int __init extfrag_debug_init(void)       2123 static int __init extfrag_debug_init(void)
2319 {                                                2124 {
2320         struct dentry *extfrag_debug_root;       2125         struct dentry *extfrag_debug_root;
2321                                                  2126 
2322         extfrag_debug_root = debugfs_create_d    2127         extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2323                                                  2128 
2324         debugfs_create_file("unusable_index",    2129         debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
2325                             &unusable_fops);     2130                             &unusable_fops);
2326                                                  2131 
2327         debugfs_create_file("extfrag_index",     2132         debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
2328                             &extfrag_fops);      2133                             &extfrag_fops);
2329                                                  2134 
2330         return 0;                                2135         return 0;
2331 }                                                2136 }
2332                                                  2137 
2333 module_init(extfrag_debug_init);                 2138 module_init(extfrag_debug_init);
2334                                               << 
2335 #endif                                           2139 #endif
2336                                                  2140 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php