1 /* SPDX-License-Identifier: GPL-2.0 */ 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 2 /* 3 * Copyright (c) 2023 Meta Platforms, Inc. and 3 * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. 4 * Copyright (c) 2023 Tejun Heo <tj@kernel.org 4 * Copyright (c) 2023 Tejun Heo <tj@kernel.org> 5 * Copyright (c) 2023 David Vernet <dvernet@me 5 * Copyright (c) 2023 David Vernet <dvernet@meta.com> 6 */ 6 */ 7 #include <stdio.h> 7 #include <stdio.h> 8 #include <signal.h> 8 #include <signal.h> 9 #include <unistd.h> 9 #include <unistd.h> 10 #include <libgen.h> 10 #include <libgen.h> 11 #include <limits.h> 11 #include <limits.h> 12 #include <inttypes.h> 12 #include <inttypes.h> 13 #include <fcntl.h> 13 #include <fcntl.h> 14 #include <time.h> 14 #include <time.h> 15 #include <bpf/bpf.h> 15 #include <bpf/bpf.h> 16 #include <scx/common.h> 16 #include <scx/common.h> 17 #include "scx_flatcg.h" 17 #include "scx_flatcg.h" 18 #include "scx_flatcg.bpf.skel.h" 18 #include "scx_flatcg.bpf.skel.h" 19 19 20 #ifndef FILEID_KERNFS 20 #ifndef FILEID_KERNFS 21 #define FILEID_KERNFS 0xfe 21 #define FILEID_KERNFS 0xfe 22 #endif 22 #endif 23 23 24 const char help_fmt[] = 24 const char help_fmt[] = 25 "A flattened cgroup hierarchy sched_ext schedu 25 "A flattened cgroup hierarchy sched_ext scheduler.\n" 26 "\n" 26 "\n" 27 "See the top-level comment in .bpf.c for more 27 "See the top-level comment in .bpf.c for more details.\n" 28 "\n" 28 "\n" 29 "Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [- 29 "Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n" 30 "\n" 30 "\n" 31 " -s SLICE_US Override slice duration\n" 31 " -s SLICE_US Override slice duration\n" 32 " -i INTERVAL Report interval\n" 32 " -i INTERVAL Report interval\n" 33 " -f Use FIFO scheduling instead o 33 " -f Use FIFO scheduling instead of weighted vtime scheduling\n" 34 " -v Print libbpf debug messages\n 34 " -v Print libbpf debug messages\n" 35 " -h Display this help and exit\n" 35 " -h Display this help and exit\n"; 36 36 37 static bool verbose; 37 static bool verbose; 38 static volatile int exit_req; 38 static volatile int exit_req; 39 39 40 static int libbpf_print_fn(enum libbpf_print_l 40 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) 41 { 41 { 42 if (level == LIBBPF_DEBUG && !verbose) 42 if (level == LIBBPF_DEBUG && !verbose) 43 return 0; 43 return 0; 44 return vfprintf(stderr, format, args); 44 return vfprintf(stderr, format, args); 45 } 45 } 46 46 47 static void sigint_handler(int dummy) 47 static void sigint_handler(int dummy) 48 { 48 { 49 exit_req = 1; 49 exit_req = 1; 50 } 50 } 51 51 52 static float read_cpu_util(__u64 *last_sum, __ 52 static float read_cpu_util(__u64 *last_sum, __u64 *last_idle) 53 { 53 { 54 FILE *fp; 54 FILE *fp; 55 char buf[4096]; 55 char buf[4096]; 56 char *line, *cur = NULL, *tok; 56 char *line, *cur = NULL, *tok; 57 __u64 sum = 0, idle = 0; 57 __u64 sum = 0, idle = 0; 58 __u64 delta_sum, delta_idle; 58 __u64 delta_sum, delta_idle; 59 int idx; 59 int idx; 60 60 61 fp = fopen("/proc/stat", "r"); 61 fp = fopen("/proc/stat", "r"); 62 if (!fp) { 62 if (!fp) { 63 perror("fopen(\"/proc/stat\")" 63 perror("fopen(\"/proc/stat\")"); 64 return 0.0; 64 return 0.0; 65 } 65 } 66 66 67 if (!fgets(buf, sizeof(buf), fp)) { 67 if (!fgets(buf, sizeof(buf), fp)) { 68 perror("fgets(\"/proc/stat\")" 68 perror("fgets(\"/proc/stat\")"); 69 fclose(fp); 69 fclose(fp); 70 return 0.0; 70 return 0.0; 71 } 71 } 72 fclose(fp); 72 fclose(fp); 73 73 74 line = buf; 74 line = buf; 75 for (idx = 0; (tok = strtok_r(line, " 75 for (idx = 0; (tok = strtok_r(line, " \n", &cur)); idx++) { 76 char *endp = NULL; 76 char *endp = NULL; 77 __u64 v; 77 __u64 v; 78 78 79 if (idx == 0) { 79 if (idx == 0) { 80 line = NULL; 80 line = NULL; 81 continue; 81 continue; 82 } 82 } 83 v = strtoull(tok, &endp, 0); 83 v = strtoull(tok, &endp, 0); 84 if (!endp || *endp != '\0') { 84 if (!endp || *endp != '\0') { 85 fprintf(stderr, "faile 85 fprintf(stderr, "failed to parse %dth field of /proc/stat (\"%s\")\n", 86 idx, tok); 86 idx, tok); 87 continue; 87 continue; 88 } 88 } 89 sum += v; 89 sum += v; 90 if (idx == 4) 90 if (idx == 4) 91 idle = v; 91 idle = v; 92 } 92 } 93 93 94 delta_sum = sum - *last_sum; 94 delta_sum = sum - *last_sum; 95 delta_idle = idle - *last_idle; 95 delta_idle = idle - *last_idle; 96 *last_sum = sum; 96 *last_sum = sum; 97 *last_idle = idle; 97 *last_idle = idle; 98 98 99 return delta_sum ? (float)(delta_sum - 99 return delta_sum ? (float)(delta_sum - delta_idle) / delta_sum : 0.0; 100 } 100 } 101 101 102 static void fcg_read_stats(struct scx_flatcg * 102 static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats) 103 { 103 { 104 __u64 cnts[FCG_NR_STATS][skel->rodata- 104 __u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus]; 105 __u32 idx; 105 __u32 idx; 106 106 107 memset(stats, 0, sizeof(stats[0]) * FC 107 memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS); 108 108 109 for (idx = 0; idx < FCG_NR_STATS; idx+ 109 for (idx = 0; idx < FCG_NR_STATS; idx++) { 110 int ret, cpu; 110 int ret, cpu; 111 111 112 ret = bpf_map_lookup_elem(bpf_ 112 ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), 113 &idx 113 &idx, cnts[idx]); 114 if (ret < 0) 114 if (ret < 0) 115 continue; 115 continue; 116 for (cpu = 0; cpu < skel->roda 116 for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++) 117 stats[idx] += cnts[idx 117 stats[idx] += cnts[idx][cpu]; 118 } 118 } 119 } 119 } 120 120 121 int main(int argc, char **argv) 121 int main(int argc, char **argv) 122 { 122 { 123 struct scx_flatcg *skel; 123 struct scx_flatcg *skel; 124 struct bpf_link *link; 124 struct bpf_link *link; 125 struct timespec intv_ts = { .tv_sec = 125 struct timespec intv_ts = { .tv_sec = 2, .tv_nsec = 0 }; 126 bool dump_cgrps = false; 126 bool dump_cgrps = false; 127 __u64 last_cpu_sum = 0, last_cpu_idle 127 __u64 last_cpu_sum = 0, last_cpu_idle = 0; 128 __u64 last_stats[FCG_NR_STATS] = {}; 128 __u64 last_stats[FCG_NR_STATS] = {}; 129 unsigned long seq = 0; 129 unsigned long seq = 0; 130 __s32 opt; 130 __s32 opt; 131 __u64 ecode; 131 __u64 ecode; 132 132 133 libbpf_set_print(libbpf_print_fn); 133 libbpf_set_print(libbpf_print_fn); 134 signal(SIGINT, sigint_handler); 134 signal(SIGINT, sigint_handler); 135 signal(SIGTERM, sigint_handler); 135 signal(SIGTERM, sigint_handler); 136 restart: 136 restart: 137 skel = SCX_OPS_OPEN(flatcg_ops, scx_fl 137 skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); 138 138 139 skel->rodata->nr_cpus = libbpf_num_pos 139 skel->rodata->nr_cpus = libbpf_num_possible_cpus(); 140 140 141 while ((opt = getopt(argc, argv, "s:i: 141 while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) { 142 double v; 142 double v; 143 143 144 switch (opt) { 144 switch (opt) { 145 case 's': 145 case 's': 146 v = strtod(optarg, NUL 146 v = strtod(optarg, NULL); 147 skel->rodata->cgrp_sli 147 skel->rodata->cgrp_slice_ns = v * 1000; 148 break; 148 break; 149 case 'i': 149 case 'i': 150 v = strtod(optarg, NUL 150 v = strtod(optarg, NULL); 151 intv_ts.tv_sec = v; 151 intv_ts.tv_sec = v; 152 intv_ts.tv_nsec = (v - 152 intv_ts.tv_nsec = (v - (float)intv_ts.tv_sec) * 1000000000; 153 break; 153 break; 154 case 'd': 154 case 'd': 155 dump_cgrps = true; 155 dump_cgrps = true; 156 break; 156 break; 157 case 'f': 157 case 'f': 158 skel->rodata->fifo_sch 158 skel->rodata->fifo_sched = true; 159 break; 159 break; 160 case 'v': 160 case 'v': 161 verbose = true; 161 verbose = true; 162 break; 162 break; 163 case 'h': 163 case 'h': 164 default: 164 default: 165 fprintf(stderr, help_f 165 fprintf(stderr, help_fmt, basename(argv[0])); 166 return opt != 'h'; 166 return opt != 'h'; 167 } 167 } 168 } 168 } 169 169 170 printf("slice=%.1lfms intv=%.1lfs dump 170 printf("slice=%.1lfms intv=%.1lfs dump_cgrps=%d", 171 (double)skel->rodata->cgrp_slic 171 (double)skel->rodata->cgrp_slice_ns / 1000000.0, 172 (double)intv_ts.tv_sec + (doubl 172 (double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0, 173 dump_cgrps); 173 dump_cgrps); 174 174 175 SCX_OPS_LOAD(skel, flatcg_ops, scx_fla 175 SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei); 176 link = SCX_OPS_ATTACH(skel, flatcg_ops 176 link = SCX_OPS_ATTACH(skel, flatcg_ops, scx_flatcg); 177 177 178 while (!exit_req && !UEI_EXITED(skel, 178 while (!exit_req && !UEI_EXITED(skel, uei)) { 179 __u64 acc_stats[FCG_NR_STATS]; 179 __u64 acc_stats[FCG_NR_STATS]; 180 __u64 stats[FCG_NR_STATS]; 180 __u64 stats[FCG_NR_STATS]; 181 float cpu_util; 181 float cpu_util; 182 int i; 182 int i; 183 183 184 cpu_util = read_cpu_util(&last 184 cpu_util = read_cpu_util(&last_cpu_sum, &last_cpu_idle); 185 185 186 fcg_read_stats(skel, acc_stats 186 fcg_read_stats(skel, acc_stats); 187 for (i = 0; i < FCG_NR_STATS; 187 for (i = 0; i < FCG_NR_STATS; i++) 188 stats[i] = acc_stats[i 188 stats[i] = acc_stats[i] - last_stats[i]; 189 189 190 memcpy(last_stats, acc_stats, 190 memcpy(last_stats, acc_stats, sizeof(acc_stats)); 191 191 192 printf("\n[SEQ %6lu cpu=%5.1lf 192 printf("\n[SEQ %6lu cpu=%5.1lf hweight_gen=%" PRIu64 "]\n", 193 seq++, cpu_util * 100.0 193 seq++, cpu_util * 100.0, skel->data->hweight_gen); 194 printf(" act:%6llu deac 194 printf(" act:%6llu deact:%6llu global:%6llu local:%6llu\n", 195 stats[FCG_STAT_ACT], 195 stats[FCG_STAT_ACT], 196 stats[FCG_STAT_DEACT], 196 stats[FCG_STAT_DEACT], 197 stats[FCG_STAT_GLOBAL], 197 stats[FCG_STAT_GLOBAL], 198 stats[FCG_STAT_LOCAL]); 198 stats[FCG_STAT_LOCAL]); 199 printf("HWT cache:%6llu updat 199 printf("HWT cache:%6llu update:%6llu skip:%6llu race:%6llu\n", 200 stats[FCG_STAT_HWT_CACH 200 stats[FCG_STAT_HWT_CACHE], 201 stats[FCG_STAT_HWT_UPDA 201 stats[FCG_STAT_HWT_UPDATES], 202 stats[FCG_STAT_HWT_SKIP 202 stats[FCG_STAT_HWT_SKIP], 203 stats[FCG_STAT_HWT_RACE 203 stats[FCG_STAT_HWT_RACE]); 204 printf("ENQ skip:%6llu rac 204 printf("ENQ skip:%6llu race:%6llu\n", 205 stats[FCG_STAT_ENQ_SKIP 205 stats[FCG_STAT_ENQ_SKIP], 206 stats[FCG_STAT_ENQ_RACE 206 stats[FCG_STAT_ENQ_RACE]); 207 printf("CNS keep:%6llu expir 207 printf("CNS keep:%6llu expire:%6llu empty:%6llu gone:%6llu\n", 208 stats[FCG_STAT_CNS_KEEP 208 stats[FCG_STAT_CNS_KEEP], 209 stats[FCG_STAT_CNS_EXPI 209 stats[FCG_STAT_CNS_EXPIRE], 210 stats[FCG_STAT_CNS_EMPT 210 stats[FCG_STAT_CNS_EMPTY], 211 stats[FCG_STAT_CNS_GONE 211 stats[FCG_STAT_CNS_GONE]); 212 printf("PNC next:%6llu empt 212 printf("PNC next:%6llu empty:%6llu nocgrp:%6llu gone:%6llu race:%6llu fail:%6llu\n", 213 stats[FCG_STAT_PNC_NEXT 213 stats[FCG_STAT_PNC_NEXT], 214 stats[FCG_STAT_PNC_EMPT 214 stats[FCG_STAT_PNC_EMPTY], 215 stats[FCG_STAT_PNC_NO_C 215 stats[FCG_STAT_PNC_NO_CGRP], 216 stats[FCG_STAT_PNC_GONE 216 stats[FCG_STAT_PNC_GONE], 217 stats[FCG_STAT_PNC_RACE 217 stats[FCG_STAT_PNC_RACE], 218 stats[FCG_STAT_PNC_FAIL 218 stats[FCG_STAT_PNC_FAIL]); 219 printf("BAD remove:%6llu\n", 219 printf("BAD remove:%6llu\n", 220 acc_stats[FCG_STAT_BAD_ 220 acc_stats[FCG_STAT_BAD_REMOVAL]); 221 fflush(stdout); 221 fflush(stdout); 222 222 223 nanosleep(&intv_ts, NULL); 223 nanosleep(&intv_ts, NULL); 224 } 224 } 225 225 226 bpf_link__destroy(link); 226 bpf_link__destroy(link); 227 ecode = UEI_REPORT(skel, uei); 227 ecode = UEI_REPORT(skel, uei); 228 scx_flatcg__destroy(skel); 228 scx_flatcg__destroy(skel); 229 229 230 if (UEI_ECODE_RESTART(ecode)) 230 if (UEI_ECODE_RESTART(ecode)) 231 goto restart; 231 goto restart; 232 return 0; 232 return 0; 233 } 233 } 234 234
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.