~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/s390/kernel/hiperdispatch.c

Version: ~ [ linux-6.12-rc7 ] ~ [ linux-6.11.7 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.60 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.116 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.171 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.229 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.285 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.323 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.12 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

Diff markup

Differences between /arch/s390/kernel/hiperdispatch.c (Version linux-6.12-rc7) and /arch/mips/kernel/hiperdispatch.c (Version linux-2.6.32.71)


  1 // SPDX-License-Identifier: GPL-2.0                 1 
  2 /*                                                
  3  * Copyright IBM Corp. 2024                       
  4  */                                               
  5                                                   
  6 #define KMSG_COMPONENT "hd"                       
  7 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt       
  8                                                   
  9 /*                                                
 10  * Hiperdispatch:                                 
 11  * Dynamically calculates the optimum number o    
 12  * by considering the state the system is in.     
 13  * that a capacity update is necessary, it sch    
 14  * During topology updates the CPU capacities     
 15  *                                                
 16  * There is two places where CPU capacities ar    
 17  * hiperdispatch.                                 
 18  * -> hiperdispatch's reoccuring work function    
 19  *    determine high capacity CPU count.          
 20  * -> during a topology update hiperdispatch's    
 21  *    updates CPU capacities.                     
 22  * These two can run on different CPUs in para    
 23  * hiperdispatch to make wrong decisions. This    
 24  * some overhead by leading to extra rebuild_s    
 25  * for correction. Access to capacities within    
 26  * serialized to prevent the overhead.            
 27  *                                                
 28  * Hiperdispatch decision making revolves arou    
 29  * HD_STEAL_THRESHOLD value is taken as refere    
 30  * crosses the threshold value hiperdispatch f    
 31  * capacities to entitled CPUs. When steal tim    
 32  * threshold boundary, hiperdispatch utilizes     
 33  * of them high capacity.                         
 34  *                                                
 35  * The theory behind HD_STEAL_THRESHOLD is rel    
 36  * performance. Comparing the throughput of;      
 37  * - single CORE, with N threads, running N ta    
 38  * - N separate COREs running N tasks,            
 39  * using individual COREs for individual tasks    
 40  * performance. This performance difference is    
 41  * between machine generations)                   
 42  *                                                
 43  * Hiperdispatch tries to hint scheduler to us    
 44  * each task, as long as steal time on those C    
 45  * therefore delaying the throughput loss caus    
 46  */                                               
 47                                                   
 48 #include <linux/cpumask.h>                        
 49 #include <linux/debugfs.h>                        
 50 #include <linux/device.h>                         
 51 #include <linux/kernel_stat.h>                    
 52 #include <linux/kstrtox.h>                        
 53 #include <linux/ktime.h>                          
 54 #include <linux/sysctl.h>                         
 55 #include <linux/types.h>                          
 56 #include <linux/workqueue.h>                      
 57 #include <asm/hiperdispatch.h>                    
 58 #include <asm/setup.h>                            
 59 #include <asm/smp.h>                              
 60 #include <asm/topology.h>                         
 61                                                   
 62 #define CREATE_TRACE_POINTS                       
 63 #include <asm/trace/hiperdispatch.h>              
 64                                                   
 65 #define HD_DELAY_FACTOR                 (4)       
 66 #define HD_DELAY_INTERVAL               (HZ /     
 67 #define HD_STEAL_THRESHOLD              30        
 68 #define HD_STEAL_AVG_WEIGHT             16        
 69                                                   
 70 static cpumask_t hd_vl_coremask;        /* Mas    
 71 static cpumask_t hd_vmvl_cpumask;       /* Mas    
 72 static int hd_high_capacity_cores;      /* Cur    
 73 static int hd_entitled_cores;           /* Tot    
 74 static int hd_online_cores;             /* Cur    
 75                                                   
 76 static unsigned long hd_previous_steal; /* Pre    
 77 static unsigned long hd_high_time;      /* Tot    
 78 static unsigned long hd_low_time;       /* Tot    
 79 static atomic64_t hd_adjustments;       /* Tot    
 80                                                   
 81 static unsigned int hd_steal_threshold = HD_ST    
 82 static unsigned int hd_delay_factor = HD_DELAY    
 83 static int hd_enabled;                            
 84                                                   
 85 static void hd_capacity_work_fn(struct work_st    
 86 static DECLARE_DELAYED_WORK(hd_capacity_work,     
 87                                                   
 88 static int hd_set_hiperdispatch_mode(int enabl    
 89 {                                                 
 90         if (!MACHINE_HAS_TOPOLOGY)                
 91                 enable = 0;                       
 92         if (hd_enabled == enable)                 
 93                 return 0;                         
 94         hd_enabled = enable;                      
 95         return 1;                                 
 96 }                                                 
 97                                                   
 98 void hd_reset_state(void)                         
 99 {                                                 
100         cpumask_clear(&hd_vl_coremask);           
101         cpumask_clear(&hd_vmvl_cpumask);          
102         hd_entitled_cores = 0;                    
103         hd_online_cores = 0;                      
104 }                                                 
105                                                   
106 void hd_add_core(int cpu)                         
107 {                                                 
108         const struct cpumask *siblings;           
109         int polarization;                         
110                                                   
111         hd_online_cores++;                        
112         polarization = smp_cpu_get_polarizatio    
113         siblings = topology_sibling_cpumask(cp    
114         switch (polarization) {                   
115         case POLARIZATION_VH:                     
116                 hd_entitled_cores++;              
117                 break;                            
118         case POLARIZATION_VM:                     
119                 hd_entitled_cores++;              
120                 cpumask_or(&hd_vmvl_cpumask, &    
121                 break;                            
122         case POLARIZATION_VL:                     
123                 cpumask_set_cpu(cpu, &hd_vl_co    
124                 cpumask_or(&hd_vmvl_cpumask, &    
125                 break;                            
126         }                                         
127 }                                                 
128                                                   
129 /* Serialize update and read operations of deb    
130 static DEFINE_MUTEX(hd_counter_mutex);            
131                                                   
132 static void hd_update_times(void)                 
133 {                                                 
134         static ktime_t prev;                      
135         ktime_t now;                              
136                                                   
137         /*                                        
138          * Check if hiperdispatch is active, i    
139          * This way it is possible to differen    
140          * enabling hiperdispatch.                
141          */                                       
142         if (hd_entitled_cores == 0 || hd_enabl    
143                 prev = ktime_set(0, 0);           
144                 return;                           
145         }                                         
146         now = ktime_get();                        
147         if (ktime_after(prev, 0)) {               
148                 if (hd_high_capacity_cores ==     
149                         hd_high_time += ktime_    
150                 else                              
151                         hd_low_time += ktime_m    
152         }                                         
153         prev = now;                               
154 }                                                 
155                                                   
156 static void hd_update_capacities(void)            
157 {                                                 
158         int cpu, upscaling_cores;                 
159         unsigned long capacity;                   
160                                                   
161         upscaling_cores = hd_high_capacity_cor    
162         capacity = upscaling_cores > 0 ? CPU_C    
163         hd_high_capacity_cores = hd_entitled_c    
164         for_each_cpu(cpu, &hd_vl_coremask) {      
165                 smp_set_core_capacity(cpu, cap    
166                 if (capacity != CPU_CAPACITY_H    
167                         continue;                 
168                 hd_high_capacity_cores++;         
169                 upscaling_cores--;                
170                 if (upscaling_cores == 0)         
171                         capacity = CPU_CAPACIT    
172         }                                         
173 }                                                 
174                                                   
175 void hd_disable_hiperdispatch(void)               
176 {                                                 
177         cancel_delayed_work_sync(&hd_capacity_    
178         hd_high_capacity_cores = hd_online_cor    
179         hd_previous_steal = 0;                    
180 }                                                 
181                                                   
182 int hd_enable_hiperdispatch(void)                 
183 {                                                 
184         mutex_lock(&hd_counter_mutex);            
185         hd_update_times();                        
186         mutex_unlock(&hd_counter_mutex);          
187         if (hd_enabled == 0)                      
188                 return 0;                         
189         if (hd_entitled_cores == 0)               
190                 return 0;                         
191         if (hd_online_cores <= hd_entitled_cor    
192                 return 0;                         
193         mod_delayed_work(system_wq, &hd_capaci    
194         hd_update_capacities();                   
195         return 1;                                 
196 }                                                 
197                                                   
198 static unsigned long hd_steal_avg(unsigned lon    
199 {                                                 
200         static unsigned long steal;               
201                                                   
202         steal = (steal * (HD_STEAL_AVG_WEIGHT     
203         return steal;                             
204 }                                                 
205                                                   
206 static unsigned long hd_calculate_steal_percen    
207 {                                                 
208         unsigned long time_delta, steal_delta,    
209         static ktime_t prev;                      
210         int cpus, cpu;                            
211         ktime_t now;                              
212                                                   
213         cpus = 0;                                 
214         steal = 0;                                
215         percentage = 0;                           
216         for_each_cpu(cpu, &hd_vmvl_cpumask) {     
217                 steal += kcpustat_cpu(cpu).cpu    
218                 cpus++;                           
219         }                                         
220         /*                                        
221          * If there is no vertical medium and     
222          * is 0 as vertical high CPUs shouldn'    
223          */                                       
224         if (cpus == 0)                            
225                 return percentage;                
226         now = ktime_get();                        
227         time_delta = ktime_to_ns(ktime_sub(now    
228         if (steal > hd_previous_steal && hd_pr    
229                 steal_delta = (steal - hd_prev    
230                 percentage = steal_delta / cpu    
231         }                                         
232         hd_previous_steal = steal;                
233         prev = now;                               
234         return percentage;                        
235 }                                                 
236                                                   
237 static void hd_capacity_work_fn(struct work_st    
238 {                                                 
239         unsigned long steal_percentage, new_co    
240                                                   
241         mutex_lock(&smp_cpu_state_mutex);         
242         /*                                        
243          * If online cores are less or equal t    
244          * does not need to make any adjustmen    
245          * disable hiperdispatch.                 
246          * Normally this check is handled on t    
247          * unhotplug, topology and cpu mask up    
248          * order, causing hd_enable_hiperdispa    
249          */                                       
250         if (hd_online_cores <= hd_entitled_cor    
251                 topology_schedule_update();       
252                 mutex_unlock(&smp_cpu_state_mu    
253                 return;                           
254         }                                         
255         steal_percentage = hd_steal_avg(hd_cal    
256         if (steal_percentage < hd_steal_thresh    
257                 new_cores = hd_online_cores;      
258         else                                      
259                 new_cores = hd_entitled_cores;    
260         if (hd_high_capacity_cores != new_core    
261                 trace_s390_hd_rebuild_domains(    
262                 hd_high_capacity_cores = new_c    
263                 atomic64_inc(&hd_adjustments);    
264                 topology_schedule_update();       
265         }                                         
266         trace_s390_hd_work_fn(steal_percentage    
267         mutex_unlock(&smp_cpu_state_mutex);       
268         schedule_delayed_work(&hd_capacity_wor    
269 }                                                 
270                                                   
271 static int hiperdispatch_ctl_handler(const str    
272                                      void *buf    
273 {                                                 
274         int hiperdispatch;                        
275         int rc;                                   
276         struct ctl_table ctl_entry = {            
277                 .procname       = ctl->procnam    
278                 .data           = &hiperdispat    
279                 .maxlen         = sizeof(int),    
280                 .extra1         = SYSCTL_ZERO,    
281                 .extra2         = SYSCTL_ONE,     
282         };                                        
283                                                   
284         hiperdispatch = hd_enabled;               
285         rc = proc_douintvec_minmax(&ctl_entry,    
286         if (rc < 0 || !write)                     
287                 return rc;                        
288         mutex_lock(&smp_cpu_state_mutex);         
289         if (hd_set_hiperdispatch_mode(hiperdis    
290                 topology_schedule_update();       
291         mutex_unlock(&smp_cpu_state_mutex);       
292         return 0;                                 
293 }                                                 
294                                                   
295 static struct ctl_table hiperdispatch_ctl_tabl    
296         {                                         
297                 .procname       = "hiperdispat    
298                 .mode           = 0644,           
299                 .proc_handler   = hiperdispatc    
300         },                                        
301 };                                                
302                                                   
303 static ssize_t hd_steal_threshold_show(struct     
304                                        struct     
305                                        char *b    
306 {                                                 
307         return sysfs_emit(buf, "%u\n", hd_stea    
308 }                                                 
309                                                   
310 static ssize_t hd_steal_threshold_store(struct    
311                                         struct    
312                                         const     
313                                         size_t    
314 {                                                 
315         unsigned int val;                         
316         int rc;                                   
317                                                   
318         rc = kstrtouint(buf, 0, &val);            
319         if (rc)                                   
320                 return rc;                        
321         if (val > 100)                            
322                 return -ERANGE;                   
323         hd_steal_threshold = val;                 
324         return count;                             
325 }                                                 
326                                                   
327 static DEVICE_ATTR_RW(hd_steal_threshold);        
328                                                   
329 static ssize_t hd_delay_factor_show(struct dev    
330                                     struct dev    
331                                     char *buf)    
332 {                                                 
333         return sysfs_emit(buf, "%u\n", hd_dela    
334 }                                                 
335                                                   
336 static ssize_t hd_delay_factor_store(struct de    
337                                      struct de    
338                                      const cha    
339                                      size_t co    
340 {                                                 
341         unsigned int val;                         
342         int rc;                                   
343                                                   
344         rc = kstrtouint(buf, 0, &val);            
345         if (rc)                                   
346                 return rc;                        
347         if (!val)                                 
348                 return -ERANGE;                   
349         hd_delay_factor = val;                    
350         return count;                             
351 }                                                 
352                                                   
353 static DEVICE_ATTR_RW(hd_delay_factor);           
354                                                   
355 static struct attribute *hd_attrs[] = {           
356         &dev_attr_hd_steal_threshold.attr,        
357         &dev_attr_hd_delay_factor.attr,           
358         NULL,                                     
359 };                                                
360                                                   
361 static const struct attribute_group hd_attr_gr    
362         .name  = "hiperdispatch",                 
363         .attrs = hd_attrs,                        
364 };                                                
365                                                   
366 static int hd_greedy_time_get(void *unused, u6    
367 {                                                 
368         mutex_lock(&hd_counter_mutex);            
369         hd_update_times();                        
370         *val = hd_high_time;                      
371         mutex_unlock(&hd_counter_mutex);          
372         return 0;                                 
373 }                                                 
374                                                   
375 DEFINE_SIMPLE_ATTRIBUTE(hd_greedy_time_fops, h    
376                                                   
377 static int hd_conservative_time_get(void *unus    
378 {                                                 
379         mutex_lock(&hd_counter_mutex);            
380         hd_update_times();                        
381         *val = hd_low_time;                       
382         mutex_unlock(&hd_counter_mutex);          
383         return 0;                                 
384 }                                                 
385                                                   
386 DEFINE_SIMPLE_ATTRIBUTE(hd_conservative_time_f    
387                                                   
388 static int hd_adjustment_count_get(void *unuse    
389 {                                                 
390         *val = atomic64_read(&hd_adjustments);    
391         return 0;                                 
392 }                                                 
393                                                   
394 DEFINE_SIMPLE_ATTRIBUTE(hd_adjustments_fops, h    
395                                                   
396 static void __init hd_create_debugfs_counters(    
397 {                                                 
398         struct dentry *dir;                       
399                                                   
400         dir = debugfs_create_dir("hiperdispatc    
401         debugfs_create_file("conservative_time    
402         debugfs_create_file("greedy_time_ms",     
403         debugfs_create_file("adjustment_count"    
404 }                                                 
405                                                   
406 static void __init hd_create_attributes(void)     
407 {                                                 
408         struct device *dev;                       
409                                                   
410         dev = bus_get_dev_root(&cpu_subsys);      
411         if (!dev)                                 
412                 return;                           
413         if (sysfs_create_group(&dev->kobj, &hd    
414                 pr_warn("Unable to create hipe    
415         put_device(dev);                          
416 }                                                 
417                                                   
418 static int __init hd_init(void)                   
419 {                                                 
420         if (IS_ENABLED(CONFIG_HIPERDISPATCH_ON    
421                 hd_set_hiperdispatch_mode(1);     
422                 topology_schedule_update();       
423         }                                         
424         if (!register_sysctl("s390", hiperdisp    
425                 pr_warn("Failed to register s3    
426         hd_create_debugfs_counters();             
427         hd_create_attributes();                   
428         return 0;                                 
429 }                                                 
430 late_initcall(hd_init);                           
431                                                   

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php