~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/samples/bpf/cpustat_kern.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 
  3 #include <linux/version.h>
  4 #include <linux/ptrace.h>
  5 #include <uapi/linux/bpf.h>
  6 #include <bpf/bpf_helpers.h>
  7 
  8 /*
  9  * The CPU number, cstate number and pstate number are based
 10  * on 96boards Hikey with octa CA53 CPUs.
 11  *
 12  * Every CPU have three idle states for cstate:
 13  *   WFI, CPU_OFF, CLUSTER_OFF
 14  *
 15  * Every CPU have 5 operating points:
 16  *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
 17  *
 18  * This code is based on these assumption and other platforms
 19  * need to adjust these definitions.
 20  */
 21 #define MAX_CPU                 8
 22 #define MAX_PSTATE_ENTRIES      5
 23 #define MAX_CSTATE_ENTRIES      3
 24 
 25 static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
 26 
 27 /*
 28  * my_map structure is used to record cstate and pstate index and
 29  * timestamp (Idx, Ts), when new event incoming we need to update
 30  * combination for new state index and timestamp (Idx`, Ts`).
 31  *
 32  * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
 33  * interval for the previous state: Duration(Idx) = Ts` - Ts.
 34  *
 35  * Every CPU has one below array for recording state index and
 36  * timestamp, and record for cstate and pstate saperately:
 37  *
 38  * +--------------------------+
 39  * | cstate timestamp         |
 40  * +--------------------------+
 41  * | cstate index             |
 42  * +--------------------------+
 43  * | pstate timestamp         |
 44  * +--------------------------+
 45  * | pstate index             |
 46  * +--------------------------+
 47  */
 48 #define MAP_OFF_CSTATE_TIME     0
 49 #define MAP_OFF_CSTATE_IDX      1
 50 #define MAP_OFF_PSTATE_TIME     2
 51 #define MAP_OFF_PSTATE_IDX      3
 52 #define MAP_OFF_NUM             4
 53 
 54 struct {
 55         __uint(type, BPF_MAP_TYPE_ARRAY);
 56         __type(key, u32);
 57         __type(value, u64);
 58         __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
 59 } my_map SEC(".maps");
 60 
 61 /* cstate_duration records duration time for every idle state per CPU */
 62 struct {
 63         __uint(type, BPF_MAP_TYPE_ARRAY);
 64         __type(key, u32);
 65         __type(value, u64);
 66         __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
 67 } cstate_duration SEC(".maps");
 68 
 69 /* pstate_duration records duration time for every operating point per CPU */
 70 struct {
 71         __uint(type, BPF_MAP_TYPE_ARRAY);
 72         __type(key, u32);
 73         __type(value, u64);
 74         __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
 75 } pstate_duration SEC(".maps");
 76 
 77 /*
 78  * The trace events for cpu_idle and cpu_frequency are taken from:
 79  * /sys/kernel/tracing/events/power/cpu_idle/format
 80  * /sys/kernel/tracing/events/power/cpu_frequency/format
 81  *
 82  * These two events have same format, so define one common structure.
 83  */
 84 struct cpu_args {
 85         u64 pad;
 86         u32 state;
 87         u32 cpu_id;
 88 };
 89 
 90 /* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
 91 static u32 find_cpu_pstate_idx(u32 frequency)
 92 {
 93         u32 i;
 94 
 95         for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
 96                 if (frequency == cpu_opps[i])
 97                         return i;
 98         }
 99 
100         return i;
101 }
102 
103 SEC("tracepoint/power/cpu_idle")
104 int bpf_prog1(struct cpu_args *ctx)
105 {
106         u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
107         u32 key, cpu, pstate_idx;
108         u64 *val;
109 
110         if (ctx->cpu_id > MAX_CPU)
111                 return 0;
112 
113         cpu = ctx->cpu_id;
114 
115         key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
116         cts = bpf_map_lookup_elem(&my_map, &key);
117         if (!cts)
118                 return 0;
119 
120         key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
121         cstate = bpf_map_lookup_elem(&my_map, &key);
122         if (!cstate)
123                 return 0;
124 
125         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
126         pts = bpf_map_lookup_elem(&my_map, &key);
127         if (!pts)
128                 return 0;
129 
130         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
131         pstate = bpf_map_lookup_elem(&my_map, &key);
132         if (!pstate)
133                 return 0;
134 
135         prev_state = *cstate;
136         *cstate = ctx->state;
137 
138         if (!*cts) {
139                 *cts = bpf_ktime_get_ns();
140                 return 0;
141         }
142 
143         cur_ts = bpf_ktime_get_ns();
144         delta = cur_ts - *cts;
145         *cts = cur_ts;
146 
147         /*
148          * When state doesn't equal to (u32)-1, the cpu will enter
149          * one idle state; for this case we need to record interval
150          * for the pstate.
151          *
152          *                 OPP2
153          *            +---------------------+
154          *     OPP1   |                     |
155          *   ---------+                     |
156          *                                  |  Idle state
157          *                                  +---------------
158          *
159          *            |<- pstate duration ->|
160          *            ^                     ^
161          *           pts                  cur_ts
162          */
163         if (ctx->state != (u32)-1) {
164 
165                 /* record pstate after have first cpu_frequency event */
166                 if (!*pts)
167                         return 0;
168 
169                 delta = cur_ts - *pts;
170 
171                 pstate_idx = find_cpu_pstate_idx(*pstate);
172                 if (pstate_idx >= MAX_PSTATE_ENTRIES)
173                         return 0;
174 
175                 key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
176                 val = bpf_map_lookup_elem(&pstate_duration, &key);
177                 if (val)
178                         __sync_fetch_and_add((long *)val, delta);
179 
180         /*
181          * When state equal to (u32)-1, the cpu just exits from one
182          * specific idle state; for this case we need to record
183          * interval for the pstate.
184          *
185          *       OPP2
186          *   -----------+
187          *              |                          OPP1
188          *              |                     +-----------
189          *              |     Idle state      |
190          *              +---------------------+
191          *
192          *              |<- cstate duration ->|
193          *              ^                     ^
194          *             cts                  cur_ts
195          */
196         } else {
197 
198                 key = cpu * MAX_CSTATE_ENTRIES + prev_state;
199                 val = bpf_map_lookup_elem(&cstate_duration, &key);
200                 if (val)
201                         __sync_fetch_and_add((long *)val, delta);
202         }
203 
204         /* Update timestamp for pstate as new start time */
205         if (*pts)
206                 *pts = cur_ts;
207 
208         return 0;
209 }
210 
211 SEC("tracepoint/power/cpu_frequency")
212 int bpf_prog2(struct cpu_args *ctx)
213 {
214         u64 *pts, *cstate, *pstate, cur_ts, delta;
215         u32 key, cpu, pstate_idx;
216         u64 *val;
217 
218         cpu = ctx->cpu_id;
219 
220         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
221         pts = bpf_map_lookup_elem(&my_map, &key);
222         if (!pts)
223                 return 0;
224 
225         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
226         pstate = bpf_map_lookup_elem(&my_map, &key);
227         if (!pstate)
228                 return 0;
229 
230         key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
231         cstate = bpf_map_lookup_elem(&my_map, &key);
232         if (!cstate)
233                 return 0;
234 
235         *pstate = ctx->state;
236 
237         if (!*pts) {
238                 *pts = bpf_ktime_get_ns();
239                 return 0;
240         }
241 
242         cur_ts = bpf_ktime_get_ns();
243         delta = cur_ts - *pts;
244         *pts = cur_ts;
245 
246         /* When CPU is in idle, bail out to skip pstate statistics */
247         if (*cstate != (u32)(-1))
248                 return 0;
249 
250         /*
251          * The cpu changes to another different OPP (in below diagram
252          * change frequency from OPP3 to OPP1), need recording interval
253          * for previous frequency OPP3 and update timestamp as start
254          * time for new frequency OPP1.
255          *
256          *                 OPP3
257          *            +---------------------+
258          *     OPP2   |                     |
259          *   ---------+                     |
260          *                                  |    OPP1
261          *                                  +---------------
262          *
263          *            |<- pstate duration ->|
264          *            ^                     ^
265          *           pts                  cur_ts
266          */
267         pstate_idx = find_cpu_pstate_idx(*pstate);
268         if (pstate_idx >= MAX_PSTATE_ENTRIES)
269                 return 0;
270 
271         key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
272         val = bpf_map_lookup_elem(&pstate_duration, &key);
273         if (val)
274                 __sync_fetch_and_add((long *)val, delta);
275 
276         return 0;
277 }
278 
279 char _license[] SEC("license") = "GPL";
280 u32 _version SEC("version") = LINUX_VERSION_CODE;
281 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php