~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/cpu/resctrl/core.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Resource Director Technology(RDT)
  4  * - Cache Allocation code.
  5  *
  6  * Copyright (C) 2016 Intel Corporation
  7  *
  8  * Authors:
  9  *    Fenghua Yu <fenghua.yu@intel.com>
 10  *    Tony Luck <tony.luck@intel.com>
 11  *    Vikas Shivappa <vikas.shivappa@intel.com>
 12  *
 13  * More information about RDT be found in the Intel (R) x86 Architecture
 14  * Software Developer Manual June 2016, volume 3, section 17.17.
 15  */
 16 
 17 #define pr_fmt(fmt)     "resctrl: " fmt
 18 
 19 #include <linux/cpu.h>
 20 #include <linux/slab.h>
 21 #include <linux/err.h>
 22 #include <linux/cpuhotplug.h>
 23 
 24 #include <asm/cpu_device_id.h>
 25 #include <asm/resctrl.h>
 26 #include "internal.h"
 27 
 28 /*
 29  * rdt_domain structures are kfree()d when their last CPU goes offline,
 30  * and allocated when the first CPU in a new domain comes online.
 31  * The rdt_resource's domain list is updated when this happens. Readers of
 32  * the domain list must either take cpus_read_lock(), or rely on an RCU
 33  * read-side critical section, to avoid observing concurrent modification.
 34  * All writers take this mutex:
 35  */
 36 static DEFINE_MUTEX(domain_list_lock);
 37 
 38 /*
 39  * The cached resctrl_pqr_state is strictly per CPU and can never be
 40  * updated from a remote CPU. Functions which modify the state
 41  * are called with interrupts disabled and no preemption, which
 42  * is sufficient for the protection.
 43  */
 44 DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state);
 45 
 46 /*
 47  * Used to store the max resource name width and max resource data width
 48  * to display the schemata in a tabular format
 49  */
 50 int max_name_width, max_data_width;
 51 
 52 /*
 53  * Global boolean for rdt_alloc which is true if any
 54  * resource allocation is enabled.
 55  */
 56 bool rdt_alloc_capable;
 57 
 58 static void mba_wrmsr_intel(struct msr_param *m);
 59 static void cat_wrmsr(struct msr_param *m);
 60 static void mba_wrmsr_amd(struct msr_param *m);
 61 
 62 #define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
 63 #define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)
 64 
 65 struct rdt_hw_resource rdt_resources_all[] = {
 66         [RDT_RESOURCE_L3] =
 67         {
 68                 .r_resctrl = {
 69                         .rid                    = RDT_RESOURCE_L3,
 70                         .name                   = "L3",
 71                         .ctrl_scope             = RESCTRL_L3_CACHE,
 72                         .mon_scope              = RESCTRL_L3_CACHE,
 73                         .ctrl_domains           = ctrl_domain_init(RDT_RESOURCE_L3),
 74                         .mon_domains            = mon_domain_init(RDT_RESOURCE_L3),
 75                         .parse_ctrlval          = parse_cbm,
 76                         .format_str             = "%d=%0*x",
 77                         .fflags                 = RFTYPE_RES_CACHE,
 78                 },
 79                 .msr_base               = MSR_IA32_L3_CBM_BASE,
 80                 .msr_update             = cat_wrmsr,
 81         },
 82         [RDT_RESOURCE_L2] =
 83         {
 84                 .r_resctrl = {
 85                         .rid                    = RDT_RESOURCE_L2,
 86                         .name                   = "L2",
 87                         .ctrl_scope             = RESCTRL_L2_CACHE,
 88                         .ctrl_domains           = ctrl_domain_init(RDT_RESOURCE_L2),
 89                         .parse_ctrlval          = parse_cbm,
 90                         .format_str             = "%d=%0*x",
 91                         .fflags                 = RFTYPE_RES_CACHE,
 92                 },
 93                 .msr_base               = MSR_IA32_L2_CBM_BASE,
 94                 .msr_update             = cat_wrmsr,
 95         },
 96         [RDT_RESOURCE_MBA] =
 97         {
 98                 .r_resctrl = {
 99                         .rid                    = RDT_RESOURCE_MBA,
100                         .name                   = "MB",
101                         .ctrl_scope             = RESCTRL_L3_CACHE,
102                         .ctrl_domains           = ctrl_domain_init(RDT_RESOURCE_MBA),
103                         .parse_ctrlval          = parse_bw,
104                         .format_str             = "%d=%*u",
105                         .fflags                 = RFTYPE_RES_MB,
106                 },
107         },
108         [RDT_RESOURCE_SMBA] =
109         {
110                 .r_resctrl = {
111                         .rid                    = RDT_RESOURCE_SMBA,
112                         .name                   = "SMBA",
113                         .ctrl_scope             = RESCTRL_L3_CACHE,
114                         .ctrl_domains           = ctrl_domain_init(RDT_RESOURCE_SMBA),
115                         .parse_ctrlval          = parse_bw,
116                         .format_str             = "%d=%*u",
117                         .fflags                 = RFTYPE_RES_MB,
118                 },
119         },
120 };
121 
122 u32 resctrl_arch_system_num_rmid_idx(void)
123 {
124         struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
125 
126         /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
127         return r->num_rmid;
128 }
129 
130 /*
131  * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
132  * as they do not have CPUID enumeration support for Cache allocation.
133  * The check for Vendor/Family/Model is not enough to guarantee that
134  * the MSRs won't #GP fault because only the following SKUs support
135  * CAT:
136  *      Intel(R) Xeon(R)  CPU E5-2658  v3  @  2.20GHz
137  *      Intel(R) Xeon(R)  CPU E5-2648L v3  @  1.80GHz
138  *      Intel(R) Xeon(R)  CPU E5-2628L v3  @  2.00GHz
139  *      Intel(R) Xeon(R)  CPU E5-2618L v3  @  2.30GHz
140  *      Intel(R) Xeon(R)  CPU E5-2608L v3  @  2.00GHz
141  *      Intel(R) Xeon(R)  CPU E5-2658A v3  @  2.20GHz
142  *
143  * Probe by trying to write the first of the L3 cache mask registers
144  * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
145  * is always 20 on hsw server parts. The minimum cache bitmask length
146  * allowed for HSW server is always 2 bits. Hardcode all of them.
147  */
148 static inline void cache_alloc_hsw_probe(void)
149 {
150         struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_L3];
151         struct rdt_resource *r  = &hw_res->r_resctrl;
152         u64 max_cbm = BIT_ULL_MASK(20) - 1, l3_cbm_0;
153 
154         if (wrmsrl_safe(MSR_IA32_L3_CBM_BASE, max_cbm))
155                 return;
156 
157         rdmsrl(MSR_IA32_L3_CBM_BASE, l3_cbm_0);
158 
159         /* If all the bits were set in MSR, return success */
160         if (l3_cbm_0 != max_cbm)
161                 return;
162 
163         hw_res->num_closid = 4;
164         r->default_ctrl = max_cbm;
165         r->cache.cbm_len = 20;
166         r->cache.shareable_bits = 0xc0000;
167         r->cache.min_cbm_bits = 2;
168         r->cache.arch_has_sparse_bitmasks = false;
169         r->alloc_capable = true;
170 
171         rdt_alloc_capable = true;
172 }
173 
174 bool is_mba_sc(struct rdt_resource *r)
175 {
176         if (!r)
177                 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.mba_sc;
178 
179         /*
180          * The software controller support is only applicable to MBA resource.
181          * Make sure to check for resource type.
182          */
183         if (r->rid != RDT_RESOURCE_MBA)
184                 return false;
185 
186         return r->membw.mba_sc;
187 }
188 
189 /*
190  * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
191  * exposed to user interface and the h/w understandable delay values.
192  *
193  * The non-linear delay values have the granularity of power of two
194  * and also the h/w does not guarantee a curve for configured delay
195  * values vs. actual b/w enforced.
196  * Hence we need a mapping that is pre calibrated so the user can
197  * express the memory b/w as a percentage value.
198  */
199 static inline bool rdt_get_mb_table(struct rdt_resource *r)
200 {
201         /*
202          * There are no Intel SKUs as of now to support non-linear delay.
203          */
204         pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
205                 boot_cpu_data.x86, boot_cpu_data.x86_model);
206 
207         return false;
208 }
209 
210 static __init bool __get_mem_config_intel(struct rdt_resource *r)
211 {
212         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
213         union cpuid_0x10_3_eax eax;
214         union cpuid_0x10_x_edx edx;
215         u32 ebx, ecx, max_delay;
216 
217         cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
218         hw_res->num_closid = edx.split.cos_max + 1;
219         max_delay = eax.split.max_delay + 1;
220         r->default_ctrl = MAX_MBA_BW;
221         r->membw.arch_needs_linear = true;
222         if (ecx & MBA_IS_LINEAR) {
223                 r->membw.delay_linear = true;
224                 r->membw.min_bw = MAX_MBA_BW - max_delay;
225                 r->membw.bw_gran = MAX_MBA_BW - max_delay;
226         } else {
227                 if (!rdt_get_mb_table(r))
228                         return false;
229                 r->membw.arch_needs_linear = false;
230         }
231         r->data_width = 3;
232 
233         if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
234                 r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
235         else
236                 r->membw.throttle_mode = THREAD_THROTTLE_MAX;
237         thread_throttle_mode_init();
238 
239         r->alloc_capable = true;
240 
241         return true;
242 }
243 
244 static __init bool __rdt_get_mem_config_amd(struct rdt_resource *r)
245 {
246         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
247         u32 eax, ebx, ecx, edx, subleaf;
248 
249         /*
250          * Query CPUID_Fn80000020_EDX_x01 for MBA and
251          * CPUID_Fn80000020_EDX_x02 for SMBA
252          */
253         subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 :  1;
254 
255         cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
256         hw_res->num_closid = edx + 1;
257         r->default_ctrl = 1 << eax;
258 
259         /* AMD does not use delay */
260         r->membw.delay_linear = false;
261         r->membw.arch_needs_linear = false;
262 
263         /*
264          * AMD does not use memory delay throttle model to control
265          * the allocation like Intel does.
266          */
267         r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
268         r->membw.min_bw = 0;
269         r->membw.bw_gran = 1;
270         /* Max value is 2048, Data width should be 4 in decimal */
271         r->data_width = 4;
272 
273         r->alloc_capable = true;
274 
275         return true;
276 }
277 
278 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
279 {
280         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
281         union cpuid_0x10_1_eax eax;
282         union cpuid_0x10_x_ecx ecx;
283         union cpuid_0x10_x_edx edx;
284         u32 ebx;
285 
286         cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
287         hw_res->num_closid = edx.split.cos_max + 1;
288         r->cache.cbm_len = eax.split.cbm_len + 1;
289         r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
290         r->cache.shareable_bits = ebx & r->default_ctrl;
291         r->data_width = (r->cache.cbm_len + 3) / 4;
292         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
293                 r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
294         r->alloc_capable = true;
295 }
296 
297 static void rdt_get_cdp_config(int level)
298 {
299         /*
300          * By default, CDP is disabled. CDP can be enabled by mount parameter
301          * "cdp" during resctrl file system mount time.
302          */
303         rdt_resources_all[level].cdp_enabled = false;
304         rdt_resources_all[level].r_resctrl.cdp_capable = true;
305 }
306 
307 static void rdt_get_cdp_l3_config(void)
308 {
309         rdt_get_cdp_config(RDT_RESOURCE_L3);
310 }
311 
312 static void rdt_get_cdp_l2_config(void)
313 {
314         rdt_get_cdp_config(RDT_RESOURCE_L2);
315 }
316 
317 static void mba_wrmsr_amd(struct msr_param *m)
318 {
319         struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
320         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
321         unsigned int i;
322 
323         for (i = m->low; i < m->high; i++)
324                 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
325 }
326 
327 /*
328  * Map the memory b/w percentage value to delay values
329  * that can be written to QOS_MSRs.
330  * There are currently no SKUs which support non linear delay values.
331  */
332 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
333 {
334         if (r->membw.delay_linear)
335                 return MAX_MBA_BW - bw;
336 
337         pr_warn_once("Non Linear delay-bw map not supported but queried\n");
338         return r->default_ctrl;
339 }
340 
341 static void mba_wrmsr_intel(struct msr_param *m)
342 {
343         struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
344         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
345         unsigned int i;
346 
347         /*  Write the delay values for mba. */
348         for (i = m->low; i < m->high; i++)
349                 wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
350 }
351 
352 static void cat_wrmsr(struct msr_param *m)
353 {
354         struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
355         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
356         unsigned int i;
357 
358         for (i = m->low; i < m->high; i++)
359                 wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
360 }
361 
362 struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
363 {
364         struct rdt_ctrl_domain *d;
365 
366         lockdep_assert_cpus_held();
367 
368         list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
369                 /* Find the domain that contains this CPU */
370                 if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
371                         return d;
372         }
373 
374         return NULL;
375 }
376 
377 struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
378 {
379         struct rdt_mon_domain *d;
380 
381         lockdep_assert_cpus_held();
382 
383         list_for_each_entry(d, &r->mon_domains, hdr.list) {
384                 /* Find the domain that contains this CPU */
385                 if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
386                         return d;
387         }
388 
389         return NULL;
390 }
391 
392 u32 resctrl_arch_get_num_closid(struct rdt_resource *r)
393 {
394         return resctrl_to_arch_res(r)->num_closid;
395 }
396 
397 void rdt_ctrl_update(void *arg)
398 {
399         struct rdt_hw_resource *hw_res;
400         struct msr_param *m = arg;
401 
402         hw_res = resctrl_to_arch_res(m->res);
403         hw_res->msr_update(m);
404 }
405 
406 /*
407  * rdt_find_domain - Search for a domain id in a resource domain list.
408  *
409  * Search the domain list to find the domain id. If the domain id is
410  * found, return the domain. NULL otherwise.  If the domain id is not
411  * found (and NULL returned) then the first domain with id bigger than
412  * the input id can be returned to the caller via @pos.
413  */
414 struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
415                                        struct list_head **pos)
416 {
417         struct rdt_domain_hdr *d;
418         struct list_head *l;
419 
420         list_for_each(l, h) {
421                 d = list_entry(l, struct rdt_domain_hdr, list);
422                 /* When id is found, return its domain. */
423                 if (id == d->id)
424                         return d;
425                 /* Stop searching when finding id's position in sorted list. */
426                 if (id < d->id)
427                         break;
428         }
429 
430         if (pos)
431                 *pos = l;
432 
433         return NULL;
434 }
435 
436 static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
437 {
438         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
439         int i;
440 
441         /*
442          * Initialize the Control MSRs to having no control.
443          * For Cache Allocation: Set all bits in cbm
444          * For Memory Allocation: Set b/w requested to 100%
445          */
446         for (i = 0; i < hw_res->num_closid; i++, dc++)
447                 *dc = r->default_ctrl;
448 }
449 
450 static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
451 {
452         kfree(hw_dom->ctrl_val);
453         kfree(hw_dom);
454 }
455 
456 static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
457 {
458         kfree(hw_dom->arch_mbm_total);
459         kfree(hw_dom->arch_mbm_local);
460         kfree(hw_dom);
461 }
462 
463 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
464 {
465         struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
466         struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
467         struct msr_param m;
468         u32 *dc;
469 
470         dc = kmalloc_array(hw_res->num_closid, sizeof(*hw_dom->ctrl_val),
471                            GFP_KERNEL);
472         if (!dc)
473                 return -ENOMEM;
474 
475         hw_dom->ctrl_val = dc;
476         setup_default_ctrlval(r, dc);
477 
478         m.res = r;
479         m.dom = d;
480         m.low = 0;
481         m.high = hw_res->num_closid;
482         hw_res->msr_update(&m);
483         return 0;
484 }
485 
486 /**
487  * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
488  * @num_rmid:   The size of the MBM counter array
489  * @hw_dom:     The domain that owns the allocated arrays
490  */
491 static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
492 {
493         size_t tsize;
494 
495         if (is_mbm_total_enabled()) {
496                 tsize = sizeof(*hw_dom->arch_mbm_total);
497                 hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL);
498                 if (!hw_dom->arch_mbm_total)
499                         return -ENOMEM;
500         }
501         if (is_mbm_local_enabled()) {
502                 tsize = sizeof(*hw_dom->arch_mbm_local);
503                 hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL);
504                 if (!hw_dom->arch_mbm_local) {
505                         kfree(hw_dom->arch_mbm_total);
506                         hw_dom->arch_mbm_total = NULL;
507                         return -ENOMEM;
508                 }
509         }
510 
511         return 0;
512 }
513 
514 static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
515 {
516         switch (scope) {
517         case RESCTRL_L2_CACHE:
518         case RESCTRL_L3_CACHE:
519                 return get_cpu_cacheinfo_id(cpu, scope);
520         case RESCTRL_L3_NODE:
521                 return cpu_to_node(cpu);
522         default:
523                 break;
524         }
525 
526         return -EINVAL;
527 }
528 
529 static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
530 {
531         int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
532         struct rdt_hw_ctrl_domain *hw_dom;
533         struct list_head *add_pos = NULL;
534         struct rdt_domain_hdr *hdr;
535         struct rdt_ctrl_domain *d;
536         int err;
537 
538         lockdep_assert_held(&domain_list_lock);
539 
540         if (id < 0) {
541                 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
542                              cpu, r->ctrl_scope, r->name);
543                 return;
544         }
545 
546         hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos);
547         if (hdr) {
548                 if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
549                         return;
550                 d = container_of(hdr, struct rdt_ctrl_domain, hdr);
551 
552                 cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
553                 if (r->cache.arch_has_per_cpu_cfg)
554                         rdt_domain_reconfigure_cdp(r);
555                 return;
556         }
557 
558         hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
559         if (!hw_dom)
560                 return;
561 
562         d = &hw_dom->d_resctrl;
563         d->hdr.id = id;
564         d->hdr.type = RESCTRL_CTRL_DOMAIN;
565         cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
566 
567         rdt_domain_reconfigure_cdp(r);
568 
569         if (domain_setup_ctrlval(r, d)) {
570                 ctrl_domain_free(hw_dom);
571                 return;
572         }
573 
574         list_add_tail_rcu(&d->hdr.list, add_pos);
575 
576         err = resctrl_online_ctrl_domain(r, d);
577         if (err) {
578                 list_del_rcu(&d->hdr.list);
579                 synchronize_rcu();
580                 ctrl_domain_free(hw_dom);
581         }
582 }
583 
584 static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
585 {
586         int id = get_domain_id_from_scope(cpu, r->mon_scope);
587         struct list_head *add_pos = NULL;
588         struct rdt_hw_mon_domain *hw_dom;
589         struct rdt_domain_hdr *hdr;
590         struct rdt_mon_domain *d;
591         int err;
592 
593         lockdep_assert_held(&domain_list_lock);
594 
595         if (id < 0) {
596                 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
597                              cpu, r->mon_scope, r->name);
598                 return;
599         }
600 
601         hdr = rdt_find_domain(&r->mon_domains, id, &add_pos);
602         if (hdr) {
603                 if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
604                         return;
605                 d = container_of(hdr, struct rdt_mon_domain, hdr);
606 
607                 cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
608                 return;
609         }
610 
611         hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
612         if (!hw_dom)
613                 return;
614 
615         d = &hw_dom->d_resctrl;
616         d->hdr.id = id;
617         d->hdr.type = RESCTRL_MON_DOMAIN;
618         d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
619         if (!d->ci) {
620                 pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
621                 mon_domain_free(hw_dom);
622                 return;
623         }
624         cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
625 
626         arch_mon_domain_online(r, d);
627 
628         if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
629                 mon_domain_free(hw_dom);
630                 return;
631         }
632 
633         list_add_tail_rcu(&d->hdr.list, add_pos);
634 
635         err = resctrl_online_mon_domain(r, d);
636         if (err) {
637                 list_del_rcu(&d->hdr.list);
638                 synchronize_rcu();
639                 mon_domain_free(hw_dom);
640         }
641 }
642 
643 static void domain_add_cpu(int cpu, struct rdt_resource *r)
644 {
645         if (r->alloc_capable)
646                 domain_add_cpu_ctrl(cpu, r);
647         if (r->mon_capable)
648                 domain_add_cpu_mon(cpu, r);
649 }
650 
651 static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
652 {
653         int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
654         struct rdt_hw_ctrl_domain *hw_dom;
655         struct rdt_domain_hdr *hdr;
656         struct rdt_ctrl_domain *d;
657 
658         lockdep_assert_held(&domain_list_lock);
659 
660         if (id < 0) {
661                 pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
662                              cpu, r->ctrl_scope, r->name);
663                 return;
664         }
665 
666         hdr = rdt_find_domain(&r->ctrl_domains, id, NULL);
667         if (!hdr) {
668                 pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
669                         id, cpu, r->name);
670                 return;
671         }
672 
673         if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
674                 return;
675 
676         d = container_of(hdr, struct rdt_ctrl_domain, hdr);
677         hw_dom = resctrl_to_arch_ctrl_dom(d);
678 
679         cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
680         if (cpumask_empty(&d->hdr.cpu_mask)) {
681                 resctrl_offline_ctrl_domain(r, d);
682                 list_del_rcu(&d->hdr.list);
683                 synchronize_rcu();
684 
685                 /*
686                  * rdt_ctrl_domain "d" is going to be freed below, so clear
687                  * its pointer from pseudo_lock_region struct.
688                  */
689                 if (d->plr)
690                         d->plr->d = NULL;
691                 ctrl_domain_free(hw_dom);
692 
693                 return;
694         }
695 }
696 
697 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
698 {
699         int id = get_domain_id_from_scope(cpu, r->mon_scope);
700         struct rdt_hw_mon_domain *hw_dom;
701         struct rdt_domain_hdr *hdr;
702         struct rdt_mon_domain *d;
703 
704         lockdep_assert_held(&domain_list_lock);
705 
706         if (id < 0) {
707                 pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
708                              cpu, r->mon_scope, r->name);
709                 return;
710         }
711 
712         hdr = rdt_find_domain(&r->mon_domains, id, NULL);
713         if (!hdr) {
714                 pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
715                         id, cpu, r->name);
716                 return;
717         }
718 
719         if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
720                 return;
721 
722         d = container_of(hdr, struct rdt_mon_domain, hdr);
723         hw_dom = resctrl_to_arch_mon_dom(d);
724 
725         cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
726         if (cpumask_empty(&d->hdr.cpu_mask)) {
727                 resctrl_offline_mon_domain(r, d);
728                 list_del_rcu(&d->hdr.list);
729                 synchronize_rcu();
730                 mon_domain_free(hw_dom);
731 
732                 return;
733         }
734 }
735 
736 static void domain_remove_cpu(int cpu, struct rdt_resource *r)
737 {
738         if (r->alloc_capable)
739                 domain_remove_cpu_ctrl(cpu, r);
740         if (r->mon_capable)
741                 domain_remove_cpu_mon(cpu, r);
742 }
743 
744 static void clear_closid_rmid(int cpu)
745 {
746         struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
747 
748         state->default_closid = RESCTRL_RESERVED_CLOSID;
749         state->default_rmid = RESCTRL_RESERVED_RMID;
750         state->cur_closid = RESCTRL_RESERVED_CLOSID;
751         state->cur_rmid = RESCTRL_RESERVED_RMID;
752         wrmsr(MSR_IA32_PQR_ASSOC, RESCTRL_RESERVED_RMID,
753               RESCTRL_RESERVED_CLOSID);
754 }
755 
756 static int resctrl_arch_online_cpu(unsigned int cpu)
757 {
758         struct rdt_resource *r;
759 
760         mutex_lock(&domain_list_lock);
761         for_each_capable_rdt_resource(r)
762                 domain_add_cpu(cpu, r);
763         mutex_unlock(&domain_list_lock);
764 
765         clear_closid_rmid(cpu);
766         resctrl_online_cpu(cpu);
767 
768         return 0;
769 }
770 
771 static int resctrl_arch_offline_cpu(unsigned int cpu)
772 {
773         struct rdt_resource *r;
774 
775         resctrl_offline_cpu(cpu);
776 
777         mutex_lock(&domain_list_lock);
778         for_each_capable_rdt_resource(r)
779                 domain_remove_cpu(cpu, r);
780         mutex_unlock(&domain_list_lock);
781 
782         clear_closid_rmid(cpu);
783 
784         return 0;
785 }
786 
787 /*
788  * Choose a width for the resource name and resource data based on the
789  * resource that has widest name and cbm.
790  */
791 static __init void rdt_init_padding(void)
792 {
793         struct rdt_resource *r;
794 
795         for_each_alloc_capable_rdt_resource(r) {
796                 if (r->data_width > max_data_width)
797                         max_data_width = r->data_width;
798         }
799 }
800 
801 enum {
802         RDT_FLAG_CMT,
803         RDT_FLAG_MBM_TOTAL,
804         RDT_FLAG_MBM_LOCAL,
805         RDT_FLAG_L3_CAT,
806         RDT_FLAG_L3_CDP,
807         RDT_FLAG_L2_CAT,
808         RDT_FLAG_L2_CDP,
809         RDT_FLAG_MBA,
810         RDT_FLAG_SMBA,
811         RDT_FLAG_BMEC,
812 };
813 
814 #define RDT_OPT(idx, n, f)      \
815 [idx] = {                       \
816         .name = n,              \
817         .flag = f               \
818 }
819 
820 struct rdt_options {
821         char    *name;
822         int     flag;
823         bool    force_off, force_on;
824 };
825 
826 static struct rdt_options rdt_options[]  __initdata = {
827         RDT_OPT(RDT_FLAG_CMT,       "cmt",      X86_FEATURE_CQM_OCCUP_LLC),
828         RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL),
829         RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL),
830         RDT_OPT(RDT_FLAG_L3_CAT,    "l3cat",    X86_FEATURE_CAT_L3),
831         RDT_OPT(RDT_FLAG_L3_CDP,    "l3cdp",    X86_FEATURE_CDP_L3),
832         RDT_OPT(RDT_FLAG_L2_CAT,    "l2cat",    X86_FEATURE_CAT_L2),
833         RDT_OPT(RDT_FLAG_L2_CDP,    "l2cdp",    X86_FEATURE_CDP_L2),
834         RDT_OPT(RDT_FLAG_MBA,       "mba",      X86_FEATURE_MBA),
835         RDT_OPT(RDT_FLAG_SMBA,      "smba",     X86_FEATURE_SMBA),
836         RDT_OPT(RDT_FLAG_BMEC,      "bmec",     X86_FEATURE_BMEC),
837 };
838 #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options)
839 
840 static int __init set_rdt_options(char *str)
841 {
842         struct rdt_options *o;
843         bool force_off;
844         char *tok;
845 
846         if (*str == '=')
847                 str++;
848         while ((tok = strsep(&str, ",")) != NULL) {
849                 force_off = *tok == '!';
850                 if (force_off)
851                         tok++;
852                 for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
853                         if (strcmp(tok, o->name) == 0) {
854                                 if (force_off)
855                                         o->force_off = true;
856                                 else
857                                         o->force_on = true;
858                                 break;
859                         }
860                 }
861         }
862         return 1;
863 }
864 __setup("rdt", set_rdt_options);
865 
866 bool __init rdt_cpu_has(int flag)
867 {
868         bool ret = boot_cpu_has(flag);
869         struct rdt_options *o;
870 
871         if (!ret)
872                 return ret;
873 
874         for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
875                 if (flag == o->flag) {
876                         if (o->force_off)
877                                 ret = false;
878                         if (o->force_on)
879                                 ret = true;
880                         break;
881                 }
882         }
883         return ret;
884 }
885 
886 static __init bool get_mem_config(void)
887 {
888         struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_MBA];
889 
890         if (!rdt_cpu_has(X86_FEATURE_MBA))
891                 return false;
892 
893         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
894                 return __get_mem_config_intel(&hw_res->r_resctrl);
895         else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
896                 return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
897 
898         return false;
899 }
900 
901 static __init bool get_slow_mem_config(void)
902 {
903         struct rdt_hw_resource *hw_res = &rdt_resources_all[RDT_RESOURCE_SMBA];
904 
905         if (!rdt_cpu_has(X86_FEATURE_SMBA))
906                 return false;
907 
908         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
909                 return __rdt_get_mem_config_amd(&hw_res->r_resctrl);
910 
911         return false;
912 }
913 
914 static __init bool get_rdt_alloc_resources(void)
915 {
916         struct rdt_resource *r;
917         bool ret = false;
918 
919         if (rdt_alloc_capable)
920                 return true;
921 
922         if (!boot_cpu_has(X86_FEATURE_RDT_A))
923                 return false;
924 
925         if (rdt_cpu_has(X86_FEATURE_CAT_L3)) {
926                 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
927                 rdt_get_cache_alloc_cfg(1, r);
928                 if (rdt_cpu_has(X86_FEATURE_CDP_L3))
929                         rdt_get_cdp_l3_config();
930                 ret = true;
931         }
932         if (rdt_cpu_has(X86_FEATURE_CAT_L2)) {
933                 /* CPUID 0x10.2 fields are same format at 0x10.1 */
934                 r = &rdt_resources_all[RDT_RESOURCE_L2].r_resctrl;
935                 rdt_get_cache_alloc_cfg(2, r);
936                 if (rdt_cpu_has(X86_FEATURE_CDP_L2))
937                         rdt_get_cdp_l2_config();
938                 ret = true;
939         }
940 
941         if (get_mem_config())
942                 ret = true;
943 
944         if (get_slow_mem_config())
945                 ret = true;
946 
947         return ret;
948 }
949 
950 static __init bool get_rdt_mon_resources(void)
951 {
952         struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
953 
954         if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
955                 rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
956         if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
957                 rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
958         if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
959                 rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
960 
961         if (!rdt_mon_features)
962                 return false;
963 
964         return !rdt_get_mon_l3_config(r);
965 }
966 
967 static __init void __check_quirks_intel(void)
968 {
969         switch (boot_cpu_data.x86_vfm) {
970         case INTEL_HASWELL_X:
971                 if (!rdt_options[RDT_FLAG_L3_CAT].force_off)
972                         cache_alloc_hsw_probe();
973                 break;
974         case INTEL_SKYLAKE_X:
975                 if (boot_cpu_data.x86_stepping <= 4)
976                         set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
977                 else
978                         set_rdt_options("!l3cat");
979                 fallthrough;
980         case INTEL_BROADWELL_X:
981                 intel_rdt_mbm_apply_quirk();
982                 break;
983         }
984 }
985 
986 static __init void check_quirks(void)
987 {
988         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
989                 __check_quirks_intel();
990 }
991 
992 static __init bool get_rdt_resources(void)
993 {
994         rdt_alloc_capable = get_rdt_alloc_resources();
995         rdt_mon_capable = get_rdt_mon_resources();
996 
997         return (rdt_mon_capable || rdt_alloc_capable);
998 }
999 
1000 static __init void rdt_init_res_defs_intel(void)
1001 {
1002         struct rdt_hw_resource *hw_res;
1003         struct rdt_resource *r;
1004 
1005         for_each_rdt_resource(r) {
1006                 hw_res = resctrl_to_arch_res(r);
1007 
1008                 if (r->rid == RDT_RESOURCE_L3 ||
1009                     r->rid == RDT_RESOURCE_L2) {
1010                         r->cache.arch_has_per_cpu_cfg = false;
1011                         r->cache.min_cbm_bits = 1;
1012                 } else if (r->rid == RDT_RESOURCE_MBA) {
1013                         hw_res->msr_base = MSR_IA32_MBA_THRTL_BASE;
1014                         hw_res->msr_update = mba_wrmsr_intel;
1015                 }
1016         }
1017 }
1018 
1019 static __init void rdt_init_res_defs_amd(void)
1020 {
1021         struct rdt_hw_resource *hw_res;
1022         struct rdt_resource *r;
1023 
1024         for_each_rdt_resource(r) {
1025                 hw_res = resctrl_to_arch_res(r);
1026 
1027                 if (r->rid == RDT_RESOURCE_L3 ||
1028                     r->rid == RDT_RESOURCE_L2) {
1029                         r->cache.arch_has_sparse_bitmasks = true;
1030                         r->cache.arch_has_per_cpu_cfg = true;
1031                         r->cache.min_cbm_bits = 0;
1032                 } else if (r->rid == RDT_RESOURCE_MBA) {
1033                         hw_res->msr_base = MSR_IA32_MBA_BW_BASE;
1034                         hw_res->msr_update = mba_wrmsr_amd;
1035                 } else if (r->rid == RDT_RESOURCE_SMBA) {
1036                         hw_res->msr_base = MSR_IA32_SMBA_BW_BASE;
1037                         hw_res->msr_update = mba_wrmsr_amd;
1038                 }
1039         }
1040 }
1041 
1042 static __init void rdt_init_res_defs(void)
1043 {
1044         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1045                 rdt_init_res_defs_intel();
1046         else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
1047                 rdt_init_res_defs_amd();
1048 }
1049 
1050 static enum cpuhp_state rdt_online;
1051 
1052 /* Runs once on the BSP during boot. */
1053 void resctrl_cpu_detect(struct cpuinfo_x86 *c)
1054 {
1055         if (!cpu_has(c, X86_FEATURE_CQM_LLC)) {
1056                 c->x86_cache_max_rmid  = -1;
1057                 c->x86_cache_occ_scale = -1;
1058                 c->x86_cache_mbm_width_offset = -1;
1059                 return;
1060         }
1061 
1062         /* will be overridden if occupancy monitoring exists */
1063         c->x86_cache_max_rmid = cpuid_ebx(0xf);
1064 
1065         if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) ||
1066             cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) ||
1067             cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) {
1068                 u32 eax, ebx, ecx, edx;
1069 
1070                 /* QoS sub-leaf, EAX=0Fh, ECX=1 */
1071                 cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx);
1072 
1073                 c->x86_cache_max_rmid  = ecx;
1074                 c->x86_cache_occ_scale = ebx;
1075                 c->x86_cache_mbm_width_offset = eax & 0xff;
1076 
1077                 if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
1078                         c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
1079         }
1080 }
1081 
1082 static int __init resctrl_late_init(void)
1083 {
1084         struct rdt_resource *r;
1085         int state, ret;
1086 
1087         /*
1088          * Initialize functions(or definitions) that are different
1089          * between vendors here.
1090          */
1091         rdt_init_res_defs();
1092 
1093         check_quirks();
1094 
1095         if (!get_rdt_resources())
1096                 return -ENODEV;
1097 
1098         rdt_init_padding();
1099 
1100         state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
1101                                   "x86/resctrl/cat:online:",
1102                                   resctrl_arch_online_cpu,
1103                                   resctrl_arch_offline_cpu);
1104         if (state < 0)
1105                 return state;
1106 
1107         ret = rdtgroup_init();
1108         if (ret) {
1109                 cpuhp_remove_state(state);
1110                 return ret;
1111         }
1112         rdt_online = state;
1113 
1114         for_each_alloc_capable_rdt_resource(r)
1115                 pr_info("%s allocation detected\n", r->name);
1116 
1117         for_each_mon_capable_rdt_resource(r)
1118                 pr_info("%s monitoring detected\n", r->name);
1119 
1120         return 0;
1121 }
1122 
1123 late_initcall(resctrl_late_init);
1124 
1125 static void __exit resctrl_exit(void)
1126 {
1127         struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
1128 
1129         cpuhp_remove_state(rdt_online);
1130 
1131         rdtgroup_exit();
1132 
1133         if (r->mon_capable)
1134                 rdt_put_mon_l3_config();
1135 }
1136 
1137 __exitcall(resctrl_exit);
1138 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php