1 #!/usr/bin/env python3 1 #!/usr/bin/env python3 2 # SPDX-License-Identifier: GPL-2.0 2 # SPDX-License-Identifier: GPL-2.0 3 # 3 # 4 # Run a perf script command multiple times in 4 # Run a perf script command multiple times in parallel, using perf script 5 # options --cpu and --time so that each job pr 5 # options --cpu and --time so that each job processes a different chunk 6 # of the data. 6 # of the data. 7 # 7 # 8 # Copyright (c) 2024, Intel Corporation. 8 # Copyright (c) 2024, Intel Corporation. 9 9 10 import subprocess 10 import subprocess 11 import argparse 11 import argparse 12 import pathlib 12 import pathlib 13 import shlex 13 import shlex 14 import time 14 import time 15 import copy 15 import copy 16 import sys 16 import sys 17 import os 17 import os 18 import re 18 import re 19 19 20 glb_prog_name = "parallel-perf.py" 20 glb_prog_name = "parallel-perf.py" 21 glb_min_interval = 10.0 21 glb_min_interval = 10.0 22 glb_min_samples = 64 22 glb_min_samples = 64 23 23 24 class Verbosity(): 24 class Verbosity(): 25 25 26 def __init__(self, quiet=False, verbos 26 def __init__(self, quiet=False, verbose=False, debug=False): 27 self.normal = True 27 self.normal = True 28 self.verbose = verbose 28 self.verbose = verbose 29 self.debug = debug 29 self.debug = debug 30 self.self_test = True 30 self.self_test = True 31 if self.debug: 31 if self.debug: 32 self.verbose = True 32 self.verbose = True 33 if self.verbose: 33 if self.verbose: 34 quiet = False 34 quiet = False 35 if quiet: 35 if quiet: 36 self.normal = False 36 self.normal = False 37 37 38 # Manage work (Start/Wait/Kill), as represente 38 # Manage work (Start/Wait/Kill), as represented by a subprocess.Popen command 39 class Work(): 39 class Work(): 40 40 41 def __init__(self, cmd, pipe_to, outpu 41 def __init__(self, cmd, pipe_to, output_dir="."): 42 self.popen = None 42 self.popen = None 43 self.consumer = None 43 self.consumer = None 44 self.cmd = cmd 44 self.cmd = cmd 45 self.pipe_to = pipe_to 45 self.pipe_to = pipe_to 46 self.output_dir = output_dir 46 self.output_dir = output_dir 47 self.cmdout_name = f"{output_d 47 self.cmdout_name = f"{output_dir}/cmd.txt" 48 self.stdout_name = f"{output_d 48 self.stdout_name = f"{output_dir}/out.txt" 49 self.stderr_name = f"{output_d 49 self.stderr_name = f"{output_dir}/err.txt" 50 50 51 def Command(self): 51 def Command(self): 52 sh_cmd = [ shlex.quote(x) for 52 sh_cmd = [ shlex.quote(x) for x in self.cmd ] 53 return " ".join(self.cmd) 53 return " ".join(self.cmd) 54 54 55 def Stdout(self): 55 def Stdout(self): 56 return open(self.stdout_name, 56 return open(self.stdout_name, "w") 57 57 58 def Stderr(self): 58 def Stderr(self): 59 return open(self.stderr_name, 59 return open(self.stderr_name, "w") 60 60 61 def CreateOutputDir(self): 61 def CreateOutputDir(self): 62 pathlib.Path(self.output_dir). 62 pathlib.Path(self.output_dir).mkdir(parents=True, exist_ok=True) 63 63 64 def Start(self): 64 def Start(self): 65 if self.popen: 65 if self.popen: 66 return 66 return 67 self.CreateOutputDir() 67 self.CreateOutputDir() 68 with open(self.cmdout_name, "w 68 with open(self.cmdout_name, "w") as f: 69 f.write(self.Command() 69 f.write(self.Command()) 70 f.write("\n") 70 f.write("\n") 71 stdout = self.Stdout() 71 stdout = self.Stdout() 72 stderr = self.Stderr() 72 stderr = self.Stderr() 73 if self.pipe_to: 73 if self.pipe_to: 74 self.popen = subproces 74 self.popen = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=stderr) 75 args = shlex.split(sel 75 args = shlex.split(self.pipe_to) 76 self.consumer = subpro 76 self.consumer = subprocess.Popen(args, stdin=self.popen.stdout, stdout=stdout, stderr=stderr) 77 else: 77 else: 78 self.popen = subproces 78 self.popen = subprocess.Popen(self.cmd, stdout=stdout, stderr=stderr) 79 79 80 def RemoveEmptyErrFile(self): 80 def RemoveEmptyErrFile(self): 81 if os.path.exists(self.stderr_ 81 if os.path.exists(self.stderr_name): 82 if os.path.getsize(sel 82 if os.path.getsize(self.stderr_name) == 0: 83 os.unlink(self 83 os.unlink(self.stderr_name) 84 84 85 def Errors(self): 85 def Errors(self): 86 if os.path.exists(self.stderr_ 86 if os.path.exists(self.stderr_name): 87 if os.path.getsize(sel 87 if os.path.getsize(self.stderr_name) != 0: 88 return [ f"Non 88 return [ f"Non-empty error file {self.stderr_name}" ] 89 return [] 89 return [] 90 90 91 def TidyUp(self): 91 def TidyUp(self): 92 self.RemoveEmptyErrFile() 92 self.RemoveEmptyErrFile() 93 93 94 def RawPollWait(self, p, wait): 94 def RawPollWait(self, p, wait): 95 if wait: 95 if wait: 96 return p.wait() 96 return p.wait() 97 return p.poll() 97 return p.poll() 98 98 99 def Poll(self, wait=False): 99 def Poll(self, wait=False): 100 if not self.popen: 100 if not self.popen: 101 return None 101 return None 102 result = self.RawPollWait(self 102 result = self.RawPollWait(self.popen, wait) 103 if self.consumer: 103 if self.consumer: 104 res = result 104 res = result 105 result = self.RawPollW 105 result = self.RawPollWait(self.consumer, wait) 106 if result != None and 106 if result != None and res == None: 107 self.popen.kil 107 self.popen.kill() 108 result = None 108 result = None 109 elif result == 0 and r 109 elif result == 0 and res != None and res != 0: 110 result = res 110 result = res 111 if result != None: 111 if result != None: 112 self.TidyUp() 112 self.TidyUp() 113 return result 113 return result 114 114 115 def Wait(self): 115 def Wait(self): 116 return self.Poll(wait=True) 116 return self.Poll(wait=True) 117 117 118 def Kill(self): 118 def Kill(self): 119 if not self.popen: 119 if not self.popen: 120 return 120 return 121 self.popen.kill() 121 self.popen.kill() 122 if self.consumer: 122 if self.consumer: 123 self.consumer.kill() 123 self.consumer.kill() 124 124 125 def KillWork(worklist, verbosity): 125 def KillWork(worklist, verbosity): 126 for w in worklist: 126 for w in worklist: 127 w.Kill() 127 w.Kill() 128 for w in worklist: 128 for w in worklist: 129 w.Wait() 129 w.Wait() 130 130 131 def NumberOfCPUs(): 131 def NumberOfCPUs(): 132 return os.sysconf("SC_NPROCESSORS_ONLN 132 return os.sysconf("SC_NPROCESSORS_ONLN") 133 133 134 def NanoSecsToSecsStr(x): 134 def NanoSecsToSecsStr(x): 135 if x == None: 135 if x == None: 136 return "" 136 return "" 137 x = str(x) 137 x = str(x) 138 if len(x) < 10: 138 if len(x) < 10: 139 x = "0" * (10 - len(x)) + x 139 x = "0" * (10 - len(x)) + x 140 return x[:len(x) - 9] + "." + x[-9:] 140 return x[:len(x) - 9] + "." + x[-9:] 141 141 142 def InsertOptionAfter(cmd, option, after): 142 def InsertOptionAfter(cmd, option, after): 143 try: 143 try: 144 pos = cmd.index(after) 144 pos = cmd.index(after) 145 cmd.insert(pos + 1, option) 145 cmd.insert(pos + 1, option) 146 except: 146 except: 147 cmd.append(option) 147 cmd.append(option) 148 148 149 def CreateWorkList(cmd, pipe_to, output_dir, c 149 def CreateWorkList(cmd, pipe_to, output_dir, cpus, time_ranges_by_cpu): 150 max_len = len(str(cpus[-1])) 150 max_len = len(str(cpus[-1])) 151 cpu_dir_fmt = f"cpu-%.{max_len}u" 151 cpu_dir_fmt = f"cpu-%.{max_len}u" 152 worklist = [] 152 worklist = [] 153 pos = 0 153 pos = 0 154 for cpu in cpus: 154 for cpu in cpus: 155 if cpu >= 0: 155 if cpu >= 0: 156 cpu_dir = os.path.join 156 cpu_dir = os.path.join(output_dir, cpu_dir_fmt % cpu) 157 cpu_option = f"--cpu={ 157 cpu_option = f"--cpu={cpu}" 158 else: 158 else: 159 cpu_dir = output_dir 159 cpu_dir = output_dir 160 cpu_option = None 160 cpu_option = None 161 161 162 tr_dir_fmt = "time-range" 162 tr_dir_fmt = "time-range" 163 163 164 if len(time_ranges_by_cpu) > 1 164 if len(time_ranges_by_cpu) > 1: 165 time_ranges = time_ran 165 time_ranges = time_ranges_by_cpu[pos] 166 tr_dir_fmt += f"-{pos} 166 tr_dir_fmt += f"-{pos}" 167 pos += 1 167 pos += 1 168 else: 168 else: 169 time_ranges = time_ran 169 time_ranges = time_ranges_by_cpu[0] 170 170 171 max_len = len(str(len(time_ran 171 max_len = len(str(len(time_ranges))) 172 tr_dir_fmt += f"-%.{max_len}u" 172 tr_dir_fmt += f"-%.{max_len}u" 173 173 174 i = 0 174 i = 0 175 for r in time_ranges: 175 for r in time_ranges: 176 if r == [None, None]: 176 if r == [None, None]: 177 time_option = 177 time_option = None 178 work_output_di 178 work_output_dir = cpu_dir 179 else: 179 else: 180 time_option = 180 time_option = "--time=" + NanoSecsToSecsStr(r[0]) + "," + NanoSecsToSecsStr(r[1]) 181 work_output_di 181 work_output_dir = os.path.join(cpu_dir, tr_dir_fmt % i) 182 i += 1 182 i += 1 183 work_cmd = list(cmd) 183 work_cmd = list(cmd) 184 if time_option != None 184 if time_option != None: 185 InsertOptionAf 185 InsertOptionAfter(work_cmd, time_option, "script") 186 if cpu_option != None: 186 if cpu_option != None: 187 InsertOptionAf 187 InsertOptionAfter(work_cmd, cpu_option, "script") 188 w = Work(work_cmd, pip 188 w = Work(work_cmd, pipe_to, work_output_dir) 189 worklist.append(w) 189 worklist.append(w) 190 return worklist 190 return worklist 191 191 192 def DoRunWork(worklist, nr_jobs, verbosity): 192 def DoRunWork(worklist, nr_jobs, verbosity): 193 nr_to_do = len(worklist) 193 nr_to_do = len(worklist) 194 not_started = list(worklist) 194 not_started = list(worklist) 195 running = [] 195 running = [] 196 done = [] 196 done = [] 197 chg = False 197 chg = False 198 while True: 198 while True: 199 nr_done = len(done) 199 nr_done = len(done) 200 if chg and verbosity.normal: 200 if chg and verbosity.normal: 201 nr_run = len(running) 201 nr_run = len(running) 202 print(f"\rThere are {n 202 print(f"\rThere are {nr_to_do} jobs: {nr_done} completed, {nr_run} running", flush=True, end=" ") 203 if verbosity.verbose: 203 if verbosity.verbose: 204 print() 204 print() 205 chg = False 205 chg = False 206 if nr_done == nr_to_do: 206 if nr_done == nr_to_do: 207 break 207 break 208 while len(running) < nr_jobs a 208 while len(running) < nr_jobs and len(not_started): 209 w = not_started.pop(0) 209 w = not_started.pop(0) 210 running.append(w) 210 running.append(w) 211 if verbosity.verbose: 211 if verbosity.verbose: 212 print("Startin 212 print("Starting:", w.Command()) 213 w.Start() 213 w.Start() 214 chg = True 214 chg = True 215 if len(running): 215 if len(running): 216 time.sleep(0.1) 216 time.sleep(0.1) 217 finished = [] 217 finished = [] 218 not_finished = [] 218 not_finished = [] 219 while len(running): 219 while len(running): 220 w = running.pop(0) 220 w = running.pop(0) 221 r = w.Poll() 221 r = w.Poll() 222 if r == None: 222 if r == None: 223 not_finished.a 223 not_finished.append(w) 224 continue 224 continue 225 if r == 0: 225 if r == 0: 226 if verbosity.v 226 if verbosity.verbose: 227 print( 227 print("Finished:", w.Command()) 228 finished.appen 228 finished.append(w) 229 chg = True 229 chg = True 230 continue 230 continue 231 if verbosity.normal an 231 if verbosity.normal and not verbosity.verbose: 232 print() 232 print() 233 print("Job failed!\n 233 print("Job failed!\n return code:", r, "\n command: ", w.Command()) 234 if w.pipe_to: 234 if w.pipe_to: 235 print(" pip 235 print(" piped to: ", w.pipe_to) 236 print("Killing outstan 236 print("Killing outstanding jobs") 237 KillWork(not_finished, 237 KillWork(not_finished, verbosity) 238 KillWork(running, verb 238 KillWork(running, verbosity) 239 return False 239 return False 240 running = not_finished 240 running = not_finished 241 done += finished 241 done += finished 242 errorlist = [] 242 errorlist = [] 243 for w in worklist: 243 for w in worklist: 244 errorlist += w.Errors() 244 errorlist += w.Errors() 245 if len(errorlist): 245 if len(errorlist): 246 print("Errors:") 246 print("Errors:") 247 for e in errorlist: 247 for e in errorlist: 248 print(e) 248 print(e) 249 elif verbosity.normal: 249 elif verbosity.normal: 250 print("\r"," "*50, "\rAll jobs 250 print("\r"," "*50, "\rAll jobs finished successfully", flush=True) 251 return True 251 return True 252 252 253 def RunWork(worklist, nr_jobs=NumberOfCPUs(), 253 def RunWork(worklist, nr_jobs=NumberOfCPUs(), verbosity=Verbosity()): 254 try: 254 try: 255 return DoRunWork(worklist, nr_ 255 return DoRunWork(worklist, nr_jobs, verbosity) 256 except: 256 except: 257 for w in worklist: 257 for w in worklist: 258 w.Kill() 258 w.Kill() 259 raise 259 raise 260 return True 260 return True 261 261 262 def ReadHeader(perf, file_name): 262 def ReadHeader(perf, file_name): 263 return subprocess.Popen([perf, "script 263 return subprocess.Popen([perf, "script", "--header-only", "--input", file_name], stdout=subprocess.PIPE).stdout.read().decode("utf-8") 264 264 265 def ParseHeader(hdr): 265 def ParseHeader(hdr): 266 result = {} 266 result = {} 267 lines = hdr.split("\n") 267 lines = hdr.split("\n") 268 for line in lines: 268 for line in lines: 269 if ":" in line and line[0] == 269 if ":" in line and line[0] == "#": 270 pos = line.index(":") 270 pos = line.index(":") 271 name = line[1:pos-1].s 271 name = line[1:pos-1].strip() 272 value = line[pos+1:].s 272 value = line[pos+1:].strip() 273 if name in result: 273 if name in result: 274 orig_name = na 274 orig_name = name 275 nr = 2 275 nr = 2 276 while True: 276 while True: 277 name = 277 name = f"{orig_name} {nr}" 278 if nam 278 if name not in result: 279 279 break 280 nr += 280 nr += 1 281 result[name] = value 281 result[name] = value 282 return result 282 return result 283 283 284 def HeaderField(hdr_dict, hdr_fld): 284 def HeaderField(hdr_dict, hdr_fld): 285 if hdr_fld not in hdr_dict: 285 if hdr_fld not in hdr_dict: 286 raise Exception(f"'{hdr_fld}' 286 raise Exception(f"'{hdr_fld}' missing from header information") 287 return hdr_dict[hdr_fld] 287 return hdr_dict[hdr_fld] 288 288 289 # Represent the position of an option within a 289 # Represent the position of an option within a command string 290 # and provide the option value and/or remove t 290 # and provide the option value and/or remove the option 291 class OptPos(): 291 class OptPos(): 292 292 293 def Init(self, opt_element=-1, value_e 293 def Init(self, opt_element=-1, value_element=-1, opt_pos=-1, value_pos=-1, error=None): 294 self.opt_element = opt_element 294 self.opt_element = opt_element # list element that contains option 295 self.value_element = value_ele 295 self.value_element = value_element # list element that contains option value 296 self.opt_pos = opt_pos 296 self.opt_pos = opt_pos # string position of option 297 self.value_pos = value_pos 297 self.value_pos = value_pos # string position of value 298 self.error = error 298 self.error = error # error message string 299 299 300 def __init__(self, args, short_name, l 300 def __init__(self, args, short_name, long_name, default=None): 301 self.args = list(args) 301 self.args = list(args) 302 self.default = default 302 self.default = default 303 n = 2 + len(long_name) 303 n = 2 + len(long_name) 304 m = len(short_name) 304 m = len(short_name) 305 pos = -1 305 pos = -1 306 for opt in args: 306 for opt in args: 307 pos += 1 307 pos += 1 308 if m and opt[:2] == f" 308 if m and opt[:2] == f"-{short_name}": 309 if len(opt) == 309 if len(opt) == 2: 310 if pos 310 if pos + 1 < len(args): 311 311 self.Init(pos, pos + 1, 0, 0) 312 else: 312 else: 313 313 self.Init(error = f"-{short_name} option missing value") 314 else: 314 else: 315 self.I 315 self.Init(pos, pos, 0, 2) 316 return 316 return 317 if opt[:n] == f"--{lon 317 if opt[:n] == f"--{long_name}": 318 if len(opt) == 318 if len(opt) == n: 319 if pos 319 if pos + 1 < len(args): 320 320 self.Init(pos, pos + 1, 0, 0) 321 else: 321 else: 322 322 self.Init(error = f"--{long_name} option missing value") 323 elif opt[n] == 323 elif opt[n] == "=": 324 self.I 324 self.Init(pos, pos, 0, n + 1) 325 else: 325 else: 326 self.I 326 self.Init(error = f"--{long_name} option expected '='") 327 return 327 return 328 if m and opt[:1] == "- 328 if m and opt[:1] == "-" and opt[:2] != "--" and short_name in opt: 329 ipos = opt.ind 329 ipos = opt.index(short_name) 330 if "-" in opt[ 330 if "-" in opt[1:]: 331 hpos = 331 hpos = opt[1:].index("-") 332 if hpo 332 if hpos < ipos: 333 333 continue 334 if ipos + 1 == 334 if ipos + 1 == len(opt): 335 if pos 335 if pos + 1 < len(args): 336 336 self.Init(pos, pos + 1, ipos, 0) 337 else: 337 else: 338 338 self.Init(error = f"-{short_name} option missing value") 339 else: 339 else: 340 self.I 340 self.Init(pos, pos, ipos, ipos + 1) 341 return 341 return 342 self.Init() 342 self.Init() 343 343 344 def Value(self): 344 def Value(self): 345 if self.opt_element >= 0: 345 if self.opt_element >= 0: 346 if self.opt_element != 346 if self.opt_element != self.value_element: 347 return self.ar 347 return self.args[self.value_element] 348 else: 348 else: 349 return self.ar 349 return self.args[self.value_element][self.value_pos:] 350 return self.default 350 return self.default 351 351 352 def Remove(self, args): 352 def Remove(self, args): 353 if self.opt_element == -1: 353 if self.opt_element == -1: 354 return 354 return 355 if self.opt_element != self.va 355 if self.opt_element != self.value_element: 356 del args[self.value_el 356 del args[self.value_element] 357 if self.opt_pos: 357 if self.opt_pos: 358 args[self.opt_element] 358 args[self.opt_element] = args[self.opt_element][:self.opt_pos] 359 else: 359 else: 360 del args[self.opt_elem 360 del args[self.opt_element] 361 361 362 def DetermineInputFileName(cmd): 362 def DetermineInputFileName(cmd): 363 p = OptPos(cmd, "i", "input", "perf.da 363 p = OptPos(cmd, "i", "input", "perf.data") 364 if p.error: 364 if p.error: 365 raise Exception(f"perf command 365 raise Exception(f"perf command {p.error}") 366 file_name = p.Value() 366 file_name = p.Value() 367 if not os.path.exists(file_name): 367 if not os.path.exists(file_name): 368 raise Exception(f"perf command 368 raise Exception(f"perf command input file '{file_name}' not found") 369 return file_name 369 return file_name 370 370 371 def ReadOption(args, short_name, long_name, er 371 def ReadOption(args, short_name, long_name, err_prefix, remove=False): 372 p = OptPos(args, short_name, long_name 372 p = OptPos(args, short_name, long_name) 373 if p.error: 373 if p.error: 374 raise Exception(f"{err_prefix} 374 raise Exception(f"{err_prefix}{p.error}") 375 value = p.Value() 375 value = p.Value() 376 if remove: 376 if remove: 377 p.Remove(args) 377 p.Remove(args) 378 return value 378 return value 379 379 380 def ExtractOption(args, short_name, long_name, 380 def ExtractOption(args, short_name, long_name, err_prefix): 381 return ReadOption(args, short_name, lo 381 return ReadOption(args, short_name, long_name, err_prefix, True) 382 382 383 def ReadPerfOption(args, short_name, long_name 383 def ReadPerfOption(args, short_name, long_name): 384 return ReadOption(args, short_name, lo 384 return ReadOption(args, short_name, long_name, "perf command ") 385 385 386 def ExtractPerfOption(args, short_name, long_n 386 def ExtractPerfOption(args, short_name, long_name): 387 return ExtractOption(args, short_name, 387 return ExtractOption(args, short_name, long_name, "perf command ") 388 388 389 def PerfDoubleQuickCommands(cmd, file_name): 389 def PerfDoubleQuickCommands(cmd, file_name): 390 cpu_str = ReadPerfOption(cmd, "C", "cp 390 cpu_str = ReadPerfOption(cmd, "C", "cpu") 391 time_str = ReadPerfOption(cmd, "", "ti 391 time_str = ReadPerfOption(cmd, "", "time") 392 # Use double-quick sampling to determi 392 # Use double-quick sampling to determine trace data density 393 times_cmd = ["perf", "script", "--ns", 393 times_cmd = ["perf", "script", "--ns", "--input", file_name, "--itrace=qqi"] 394 if cpu_str != None and cpu_str != "": 394 if cpu_str != None and cpu_str != "": 395 times_cmd.append(f"--cpu={cpu_ 395 times_cmd.append(f"--cpu={cpu_str}") 396 if time_str != None and time_str != "" 396 if time_str != None and time_str != "": 397 times_cmd.append(f"--time={tim 397 times_cmd.append(f"--time={time_str}") 398 cnts_cmd = list(times_cmd) 398 cnts_cmd = list(times_cmd) 399 cnts_cmd.append("-Fcpu") 399 cnts_cmd.append("-Fcpu") 400 times_cmd.append("-Fcpu,time") 400 times_cmd.append("-Fcpu,time") 401 return cnts_cmd, times_cmd 401 return cnts_cmd, times_cmd 402 402 403 class CPUTimeRange(): 403 class CPUTimeRange(): 404 def __init__(self, cpu): 404 def __init__(self, cpu): 405 self.cpu = cpu 405 self.cpu = cpu 406 self.sample_cnt = 0 406 self.sample_cnt = 0 407 self.time_ranges = None 407 self.time_ranges = None 408 self.interval = 0 408 self.interval = 0 409 self.interval_remaining = 0 409 self.interval_remaining = 0 410 self.remaining = 0 410 self.remaining = 0 411 self.tr_pos = 0 411 self.tr_pos = 0 412 412 413 def CalcTimeRangesByCPU(line, cpu, cpu_time_ra 413 def CalcTimeRangesByCPU(line, cpu, cpu_time_ranges, max_time): 414 cpu_time_range = cpu_time_ranges[cpu] 414 cpu_time_range = cpu_time_ranges[cpu] 415 cpu_time_range.remaining -= 1 415 cpu_time_range.remaining -= 1 416 cpu_time_range.interval_remaining -= 1 416 cpu_time_range.interval_remaining -= 1 417 if cpu_time_range.remaining == 0: 417 if cpu_time_range.remaining == 0: 418 cpu_time_range.time_ranges[cpu 418 cpu_time_range.time_ranges[cpu_time_range.tr_pos][1] = max_time 419 return 419 return 420 if cpu_time_range.interval_remaining = 420 if cpu_time_range.interval_remaining == 0: 421 time = TimeVal(line[1][:-1], 0 421 time = TimeVal(line[1][:-1], 0) 422 time_ranges = cpu_time_range.t 422 time_ranges = cpu_time_range.time_ranges 423 time_ranges[cpu_time_range.tr_ 423 time_ranges[cpu_time_range.tr_pos][1] = time - 1 424 time_ranges.append([time, max_ 424 time_ranges.append([time, max_time]) 425 cpu_time_range.tr_pos += 1 425 cpu_time_range.tr_pos += 1 426 cpu_time_range.interval_remain 426 cpu_time_range.interval_remaining = cpu_time_range.interval 427 427 428 def CountSamplesByCPU(line, cpu, cpu_time_rang 428 def CountSamplesByCPU(line, cpu, cpu_time_ranges): 429 try: 429 try: 430 cpu_time_ranges[cpu].sample_cn 430 cpu_time_ranges[cpu].sample_cnt += 1 431 except: 431 except: 432 print("exception") 432 print("exception") 433 print("cpu", cpu) 433 print("cpu", cpu) 434 print("len(cpu_time_ranges)", 434 print("len(cpu_time_ranges)", len(cpu_time_ranges)) 435 raise 435 raise 436 436 437 def ProcessCommandOutputLines(cmd, per_cpu, fn 437 def ProcessCommandOutputLines(cmd, per_cpu, fn, *x): 438 # Assume CPU number is at beginning of 438 # Assume CPU number is at beginning of line and enclosed by [] 439 pat = re.compile(r"\s*\[[0-9]+\]") 439 pat = re.compile(r"\s*\[[0-9]+\]") 440 p = subprocess.Popen(cmd, stdout=subpr 440 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 441 while True: 441 while True: 442 line = p.stdout.readline() !! 442 if line := p.stdout.readline(): 443 if line: << 444 line = line.decode("ut 443 line = line.decode("utf-8") 445 if pat.match(line): 444 if pat.match(line): 446 line = line.sp 445 line = line.split() 447 if per_cpu: 446 if per_cpu: 448 # Assu 447 # Assumes CPU number is enclosed by [] 449 cpu = 448 cpu = int(line[0][1:-1]) 450 else: 449 else: 451 cpu = 450 cpu = 0 452 fn(line, cpu, 451 fn(line, cpu, *x) 453 else: 452 else: 454 break 453 break 455 p.wait() 454 p.wait() 456 455 457 def IntersectTimeRanges(new_time_ranges, time_ 456 def IntersectTimeRanges(new_time_ranges, time_ranges): 458 pos = 0 457 pos = 0 459 new_pos = 0 458 new_pos = 0 460 # Can assume len(time_ranges) != 0 and 459 # Can assume len(time_ranges) != 0 and len(new_time_ranges) != 0 461 # Note also, there *must* be at least 460 # Note also, there *must* be at least one intersection. 462 while pos < len(time_ranges) and new_p 461 while pos < len(time_ranges) and new_pos < len(new_time_ranges): 463 # new end < old start => no in 462 # new end < old start => no intersection, remove new 464 if new_time_ranges[new_pos][1] 463 if new_time_ranges[new_pos][1] < time_ranges[pos][0]: 465 del new_time_ranges[ne 464 del new_time_ranges[new_pos] 466 continue 465 continue 467 # new start > old end => no in 466 # new start > old end => no intersection, check next 468 if new_time_ranges[new_pos][0] 467 if new_time_ranges[new_pos][0] > time_ranges[pos][1]: 469 pos += 1 468 pos += 1 470 if pos < len(time_rang 469 if pos < len(time_ranges): 471 continue 470 continue 472 # no next, so remove r 471 # no next, so remove remaining 473 while new_pos < len(ne 472 while new_pos < len(new_time_ranges): 474 del new_time_r 473 del new_time_ranges[new_pos] 475 return 474 return 476 # Found an intersection 475 # Found an intersection 477 # new start < old start => adj 476 # new start < old start => adjust new start = old start 478 if new_time_ranges[new_pos][0] 477 if new_time_ranges[new_pos][0] < time_ranges[pos][0]: 479 new_time_ranges[new_po 478 new_time_ranges[new_pos][0] = time_ranges[pos][0] 480 # new end > old end => keep th 479 # new end > old end => keep the overlap, insert the remainder 481 if new_time_ranges[new_pos][1] 480 if new_time_ranges[new_pos][1] > time_ranges[pos][1]: 482 r = [ time_ranges[pos] 481 r = [ time_ranges[pos][1] + 1, new_time_ranges[new_pos][1] ] 483 new_time_ranges[new_po 482 new_time_ranges[new_pos][1] = time_ranges[pos][1] 484 new_pos += 1 483 new_pos += 1 485 new_time_ranges.insert 484 new_time_ranges.insert(new_pos, r) 486 continue 485 continue 487 # new [start, end] is within o 486 # new [start, end] is within old [start, end] 488 new_pos += 1 487 new_pos += 1 489 488 490 def SplitTimeRangesByTraceDataDensity(time_ran 489 def SplitTimeRangesByTraceDataDensity(time_ranges, cpus, nr, cmd, file_name, per_cpu, min_size, min_interval, verbosity): 491 if verbosity.normal: 490 if verbosity.normal: 492 print("\rAnalyzing...", flush= 491 print("\rAnalyzing...", flush=True, end=" ") 493 if verbosity.verbose: 492 if verbosity.verbose: 494 print() 493 print() 495 cnts_cmd, times_cmd = PerfDoubleQuickC 494 cnts_cmd, times_cmd = PerfDoubleQuickCommands(cmd, file_name) 496 495 497 nr_cpus = cpus[-1] + 1 if per_cpu else 496 nr_cpus = cpus[-1] + 1 if per_cpu else 1 498 if per_cpu: 497 if per_cpu: 499 nr_cpus = cpus[-1] + 1 498 nr_cpus = cpus[-1] + 1 500 cpu_time_ranges = [ CPUTimeRan 499 cpu_time_ranges = [ CPUTimeRange(cpu) for cpu in range(nr_cpus) ] 501 else: 500 else: 502 nr_cpus = 1 501 nr_cpus = 1 503 cpu_time_ranges = [ CPUTimeRan 502 cpu_time_ranges = [ CPUTimeRange(-1) ] 504 503 505 if verbosity.debug: 504 if verbosity.debug: 506 print("nr_cpus", nr_cpus) 505 print("nr_cpus", nr_cpus) 507 print("cnts_cmd", cnts_cmd) 506 print("cnts_cmd", cnts_cmd) 508 print("times_cmd", times_cmd) 507 print("times_cmd", times_cmd) 509 508 510 # Count the number of "double quick" s 509 # Count the number of "double quick" samples per CPU 511 ProcessCommandOutputLines(cnts_cmd, pe 510 ProcessCommandOutputLines(cnts_cmd, per_cpu, CountSamplesByCPU, cpu_time_ranges) 512 511 513 tot = 0 512 tot = 0 514 mx = 0 513 mx = 0 515 for cpu_time_range in cpu_time_ranges: 514 for cpu_time_range in cpu_time_ranges: 516 cnt = cpu_time_range.sample_cn 515 cnt = cpu_time_range.sample_cnt 517 tot += cnt 516 tot += cnt 518 if cnt > mx: 517 if cnt > mx: 519 mx = cnt 518 mx = cnt 520 if verbosity.debug: 519 if verbosity.debug: 521 print("cpu:", cpu_time 520 print("cpu:", cpu_time_range.cpu, "sample_cnt", cnt) 522 521 523 if min_size < 1: 522 if min_size < 1: 524 min_size = 1 523 min_size = 1 525 524 526 if mx < min_size: 525 if mx < min_size: 527 # Too little data to be worth 526 # Too little data to be worth splitting 528 if verbosity.debug: 527 if verbosity.debug: 529 print("Too little data 528 print("Too little data to split by time") 530 if nr == 0: 529 if nr == 0: 531 nr = 1 530 nr = 1 532 return [ SplitTimeRangesIntoN( 531 return [ SplitTimeRangesIntoN(time_ranges, nr, min_interval) ] 533 532 534 if nr: 533 if nr: 535 divisor = nr 534 divisor = nr 536 min_size = 1 535 min_size = 1 537 else: 536 else: 538 divisor = NumberOfCPUs() 537 divisor = NumberOfCPUs() 539 538 540 interval = int(round(tot / divisor, 0) 539 interval = int(round(tot / divisor, 0)) 541 if interval < min_size: 540 if interval < min_size: 542 interval = min_size 541 interval = min_size 543 542 544 if verbosity.debug: 543 if verbosity.debug: 545 print("divisor", divisor) 544 print("divisor", divisor) 546 print("min_size", min_size) 545 print("min_size", min_size) 547 print("interval", interval) 546 print("interval", interval) 548 547 549 min_time = time_ranges[0][0] 548 min_time = time_ranges[0][0] 550 max_time = time_ranges[-1][1] 549 max_time = time_ranges[-1][1] 551 550 552 for cpu_time_range in cpu_time_ranges: 551 for cpu_time_range in cpu_time_ranges: 553 cnt = cpu_time_range.sample_cn 552 cnt = cpu_time_range.sample_cnt 554 if cnt == 0: 553 if cnt == 0: 555 cpu_time_range.time_ra 554 cpu_time_range.time_ranges = copy.deepcopy(time_ranges) 556 continue 555 continue 557 # Adjust target interval for C 556 # Adjust target interval for CPU to give approximately equal interval sizes 558 # Determine number of interval 557 # Determine number of intervals, rounding to nearest integer 559 n = int(round(cnt / interval, 558 n = int(round(cnt / interval, 0)) 560 if n < 1: 559 if n < 1: 561 n = 1 560 n = 1 562 # Determine interval size, rou 561 # Determine interval size, rounding up 563 d, m = divmod(cnt, n) 562 d, m = divmod(cnt, n) 564 if m: 563 if m: 565 d += 1 564 d += 1 566 cpu_time_range.interval = d 565 cpu_time_range.interval = d 567 cpu_time_range.interval_remain 566 cpu_time_range.interval_remaining = d 568 cpu_time_range.remaining = cnt 567 cpu_time_range.remaining = cnt 569 # Init. time ranges for each C 568 # Init. time ranges for each CPU with the start time 570 cpu_time_range.time_ranges = [ 569 cpu_time_range.time_ranges = [ [min_time, max_time] ] 571 570 572 # Set time ranges so that the same num 571 # Set time ranges so that the same number of "double quick" samples 573 # will fall into each time range. 572 # will fall into each time range. 574 ProcessCommandOutputLines(times_cmd, p 573 ProcessCommandOutputLines(times_cmd, per_cpu, CalcTimeRangesByCPU, cpu_time_ranges, max_time) 575 574 576 for cpu_time_range in cpu_time_ranges: 575 for cpu_time_range in cpu_time_ranges: 577 if cpu_time_range.sample_cnt: 576 if cpu_time_range.sample_cnt: 578 IntersectTimeRanges(cp 577 IntersectTimeRanges(cpu_time_range.time_ranges, time_ranges) 579 578 580 return [cpu_time_ranges[cpu].time_rang 579 return [cpu_time_ranges[cpu].time_ranges for cpu in cpus] 581 580 582 def SplitSingleTimeRangeIntoN(time_range, n): 581 def SplitSingleTimeRangeIntoN(time_range, n): 583 if n <= 1: 582 if n <= 1: 584 return [time_range] 583 return [time_range] 585 start = time_range[0] 584 start = time_range[0] 586 end = time_range[1] 585 end = time_range[1] 587 duration = int((end - start + 1) / n) 586 duration = int((end - start + 1) / n) 588 if duration < 1: 587 if duration < 1: 589 return [time_range] 588 return [time_range] 590 time_ranges = [] 589 time_ranges = [] 591 for i in range(n): 590 for i in range(n): 592 time_ranges.append([start, sta 591 time_ranges.append([start, start + duration - 1]) 593 start += duration 592 start += duration 594 time_ranges[-1][1] = end 593 time_ranges[-1][1] = end 595 return time_ranges 594 return time_ranges 596 595 597 def TimeRangeDuration(r): 596 def TimeRangeDuration(r): 598 return r[1] - r[0] + 1 597 return r[1] - r[0] + 1 599 598 600 def TotalDuration(time_ranges): 599 def TotalDuration(time_ranges): 601 duration = 0 600 duration = 0 602 for r in time_ranges: 601 for r in time_ranges: 603 duration += TimeRangeDuration( 602 duration += TimeRangeDuration(r) 604 return duration 603 return duration 605 604 606 def SplitTimeRangesByInterval(time_ranges, int 605 def SplitTimeRangesByInterval(time_ranges, interval): 607 new_ranges = [] 606 new_ranges = [] 608 for r in time_ranges: 607 for r in time_ranges: 609 duration = TimeRangeDuration(r 608 duration = TimeRangeDuration(r) 610 n = duration / interval 609 n = duration / interval 611 n = int(round(n, 0)) 610 n = int(round(n, 0)) 612 new_ranges += SplitSingleTimeR 611 new_ranges += SplitSingleTimeRangeIntoN(r, n) 613 return new_ranges 612 return new_ranges 614 613 615 def SplitTimeRangesIntoN(time_ranges, n, min_i 614 def SplitTimeRangesIntoN(time_ranges, n, min_interval): 616 if n <= len(time_ranges): 615 if n <= len(time_ranges): 617 return time_ranges 616 return time_ranges 618 duration = TotalDuration(time_ranges) 617 duration = TotalDuration(time_ranges) 619 interval = duration / n 618 interval = duration / n 620 if interval < min_interval: 619 if interval < min_interval: 621 interval = min_interval 620 interval = min_interval 622 return SplitTimeRangesByInterval(time_ 621 return SplitTimeRangesByInterval(time_ranges, interval) 623 622 624 def RecombineTimeRanges(tr): 623 def RecombineTimeRanges(tr): 625 new_tr = copy.deepcopy(tr) 624 new_tr = copy.deepcopy(tr) 626 n = len(new_tr) 625 n = len(new_tr) 627 i = 1 626 i = 1 628 while i < len(new_tr): 627 while i < len(new_tr): 629 # if prev end + 1 == cur start 628 # if prev end + 1 == cur start, combine them 630 if new_tr[i - 1][1] + 1 == new 629 if new_tr[i - 1][1] + 1 == new_tr[i][0]: 631 new_tr[i][0] = new_tr[ 630 new_tr[i][0] = new_tr[i - 1][0] 632 del new_tr[i - 1] 631 del new_tr[i - 1] 633 else: 632 else: 634 i += 1 633 i += 1 635 return new_tr 634 return new_tr 636 635 637 def OpenTimeRangeEnds(time_ranges, min_time, m 636 def OpenTimeRangeEnds(time_ranges, min_time, max_time): 638 if time_ranges[0][0] <= min_time: 637 if time_ranges[0][0] <= min_time: 639 time_ranges[0][0] = None 638 time_ranges[0][0] = None 640 if time_ranges[-1][1] >= max_time: 639 if time_ranges[-1][1] >= max_time: 641 time_ranges[-1][1] = None 640 time_ranges[-1][1] = None 642 641 643 def BadTimeStr(time_str): 642 def BadTimeStr(time_str): 644 raise Exception(f"perf command bad tim 643 raise Exception(f"perf command bad time option: '{time_str}'\nCheck also 'time of first sample' and 'time of last sample' in perf script --header-only") 645 644 646 def ValidateTimeRanges(time_ranges, time_str): 645 def ValidateTimeRanges(time_ranges, time_str): 647 n = len(time_ranges) 646 n = len(time_ranges) 648 for i in range(n): 647 for i in range(n): 649 start = time_ranges[i][0] 648 start = time_ranges[i][0] 650 end = time_ranges[i][1] 649 end = time_ranges[i][1] 651 if i != 0 and start <= time_ra 650 if i != 0 and start <= time_ranges[i - 1][1]: 652 BadTimeStr(time_str) 651 BadTimeStr(time_str) 653 if start > end: 652 if start > end: 654 BadTimeStr(time_str) 653 BadTimeStr(time_str) 655 654 656 def TimeVal(s, dflt): 655 def TimeVal(s, dflt): 657 s = s.strip() 656 s = s.strip() 658 if s == "": 657 if s == "": 659 return dflt 658 return dflt 660 a = s.split(".") 659 a = s.split(".") 661 if len(a) > 2: 660 if len(a) > 2: 662 raise Exception(f"Bad time val 661 raise Exception(f"Bad time value'{s}'") 663 x = int(a[0]) 662 x = int(a[0]) 664 if x < 0: 663 if x < 0: 665 raise Exception("Negative time 664 raise Exception("Negative time not allowed") 666 x *= 1000000000 665 x *= 1000000000 667 if len(a) > 1: 666 if len(a) > 1: 668 x += int((a[1] + "000000000")[ 667 x += int((a[1] + "000000000")[:9]) 669 return x 668 return x 670 669 671 def BadCPUStr(cpu_str): 670 def BadCPUStr(cpu_str): 672 raise Exception(f"perf command bad cpu 671 raise Exception(f"perf command bad cpu option: '{cpu_str}'\nCheck also 'nrcpus avail' in perf script --header-only") 673 672 674 def ParseTimeStr(time_str, min_time, max_time) 673 def ParseTimeStr(time_str, min_time, max_time): 675 if time_str == None or time_str == "": 674 if time_str == None or time_str == "": 676 return [[min_time, max_time]] 675 return [[min_time, max_time]] 677 time_ranges = [] 676 time_ranges = [] 678 for r in time_str.split(): 677 for r in time_str.split(): 679 a = r.split(",") 678 a = r.split(",") 680 if len(a) != 2: 679 if len(a) != 2: 681 BadTimeStr(time_str) 680 BadTimeStr(time_str) 682 try: 681 try: 683 start = TimeVal(a[0], 682 start = TimeVal(a[0], min_time) 684 end = TimeVal(a[1], 683 end = TimeVal(a[1], max_time) 685 except: 684 except: 686 BadTimeStr(time_str) 685 BadTimeStr(time_str) 687 time_ranges.append([start, end 686 time_ranges.append([start, end]) 688 ValidateTimeRanges(time_ranges, time_s 687 ValidateTimeRanges(time_ranges, time_str) 689 return time_ranges 688 return time_ranges 690 689 691 def ParseCPUStr(cpu_str, nr_cpus): 690 def ParseCPUStr(cpu_str, nr_cpus): 692 if cpu_str == None or cpu_str == "": 691 if cpu_str == None or cpu_str == "": 693 return [-1] 692 return [-1] 694 cpus = [] 693 cpus = [] 695 for r in cpu_str.split(","): 694 for r in cpu_str.split(","): 696 a = r.split("-") 695 a = r.split("-") 697 if len(a) < 1 or len(a) > 2: 696 if len(a) < 1 or len(a) > 2: 698 BadCPUStr(cpu_str) 697 BadCPUStr(cpu_str) 699 try: 698 try: 700 start = int(a[0].strip 699 start = int(a[0].strip()) 701 if len(a) > 1: 700 if len(a) > 1: 702 end = int(a[1] 701 end = int(a[1].strip()) 703 else: 702 else: 704 end = start 703 end = start 705 except: 704 except: 706 BadCPUStr(cpu_str) 705 BadCPUStr(cpu_str) 707 if start < 0 or end < 0 or end 706 if start < 0 or end < 0 or end < start or end >= nr_cpus: 708 BadCPUStr(cpu_str) 707 BadCPUStr(cpu_str) 709 cpus.extend(range(start, end + 708 cpus.extend(range(start, end + 1)) 710 cpus = list(set(cpus)) # Remove duplic 709 cpus = list(set(cpus)) # Remove duplicates 711 cpus.sort() 710 cpus.sort() 712 return cpus 711 return cpus 713 712 714 class ParallelPerf(): 713 class ParallelPerf(): 715 714 716 def __init__(self, a): 715 def __init__(self, a): 717 for arg_name in vars(a): 716 for arg_name in vars(a): 718 setattr(self, arg_name 717 setattr(self, arg_name, getattr(a, arg_name)) 719 self.orig_nr = self.nr 718 self.orig_nr = self.nr 720 self.orig_cmd = list(self.cmd) 719 self.orig_cmd = list(self.cmd) 721 self.perf = self.cmd[0] 720 self.perf = self.cmd[0] 722 if os.path.exists(self.output_ 721 if os.path.exists(self.output_dir): 723 raise Exception(f"Outp 722 raise Exception(f"Output '{self.output_dir}' already exists") 724 if self.jobs < 0 or self.nr < 723 if self.jobs < 0 or self.nr < 0 or self.interval < 0: 725 raise Exception("Bad o 724 raise Exception("Bad options (negative values): try -h option for help") 726 if self.nr != 0 and self.inter 725 if self.nr != 0 and self.interval != 0: 727 raise Exception("Canno 726 raise Exception("Cannot specify number of time subdivisions and time interval") 728 if self.jobs == 0: 727 if self.jobs == 0: 729 self.jobs = NumberOfCP 728 self.jobs = NumberOfCPUs() 730 if self.nr == 0 and self.inter 729 if self.nr == 0 and self.interval == 0: 731 if self.per_cpu: 730 if self.per_cpu: 732 self.nr = 1 731 self.nr = 1 733 else: 732 else: 734 self.nr = self 733 self.nr = self.jobs 735 734 736 def Init(self): 735 def Init(self): 737 if self.verbosity.debug: 736 if self.verbosity.debug: 738 print("cmd", self.cmd) 737 print("cmd", self.cmd) 739 self.file_name = DetermineInpu 738 self.file_name = DetermineInputFileName(self.cmd) 740 self.hdr = ReadHeader(self.per 739 self.hdr = ReadHeader(self.perf, self.file_name) 741 self.hdr_dict = ParseHeader(se 740 self.hdr_dict = ParseHeader(self.hdr) 742 self.cmd_line = HeaderField(se 741 self.cmd_line = HeaderField(self.hdr_dict, "cmdline") 743 742 744 def ExtractTimeInfo(self): 743 def ExtractTimeInfo(self): 745 self.min_time = TimeVal(Header 744 self.min_time = TimeVal(HeaderField(self.hdr_dict, "time of first sample"), 0) 746 self.max_time = TimeVal(Header 745 self.max_time = TimeVal(HeaderField(self.hdr_dict, "time of last sample"), 0) 747 self.time_str = ExtractPerfOpt 746 self.time_str = ExtractPerfOption(self.cmd, "", "time") 748 self.time_ranges = ParseTimeSt 747 self.time_ranges = ParseTimeStr(self.time_str, self.min_time, self.max_time) 749 if self.verbosity.debug: 748 if self.verbosity.debug: 750 print("time_ranges", s 749 print("time_ranges", self.time_ranges) 751 750 752 def ExtractCPUInfo(self): 751 def ExtractCPUInfo(self): 753 if self.per_cpu: 752 if self.per_cpu: 754 nr_cpus = int(HeaderFi 753 nr_cpus = int(HeaderField(self.hdr_dict, "nrcpus avail")) 755 self.cpu_str = Extract 754 self.cpu_str = ExtractPerfOption(self.cmd, "C", "cpu") 756 if self.cpu_str == Non 755 if self.cpu_str == None or self.cpu_str == "": 757 self.cpus = [ 756 self.cpus = [ x for x in range(nr_cpus) ] 758 else: 757 else: 759 self.cpus = Pa 758 self.cpus = ParseCPUStr(self.cpu_str, nr_cpus) 760 else: 759 else: 761 self.cpu_str = None 760 self.cpu_str = None 762 self.cpus = [-1] 761 self.cpus = [-1] 763 if self.verbosity.debug: 762 if self.verbosity.debug: 764 print("cpus", self.cpu 763 print("cpus", self.cpus) 765 764 766 def IsIntelPT(self): 765 def IsIntelPT(self): 767 return self.cmd_line.find("int 766 return self.cmd_line.find("intel_pt") >= 0 768 767 769 def SplitTimeRanges(self): 768 def SplitTimeRanges(self): 770 if self.IsIntelPT() and self.i 769 if self.IsIntelPT() and self.interval == 0: 771 self.split_time_ranges 770 self.split_time_ranges_for_each_cpu = \ 772 SplitTimeRange 771 SplitTimeRangesByTraceDataDensity(self.time_ranges, self.cpus, self.orig_nr, 773 772 self.orig_cmd, self.file_name, self.per_cpu, 774 773 self.min_size, self.min_interval, self.verbosity) 775 elif self.nr: 774 elif self.nr: 776 self.split_time_ranges 775 self.split_time_ranges_for_each_cpu = [ SplitTimeRangesIntoN(self.time_ranges, self.nr, self.min_interval) ] 777 else: 776 else: 778 self.split_time_ranges 777 self.split_time_ranges_for_each_cpu = [ SplitTimeRangesByInterval(self.time_ranges, self.interval) ] 779 778 780 def CheckTimeRanges(self): 779 def CheckTimeRanges(self): 781 for tr in self.split_time_rang 780 for tr in self.split_time_ranges_for_each_cpu: 782 # Re-combined time ran 781 # Re-combined time ranges should be the same 783 new_tr = RecombineTime 782 new_tr = RecombineTimeRanges(tr) 784 if new_tr != self.time 783 if new_tr != self.time_ranges: 785 if self.verbos 784 if self.verbosity.debug: 786 print( 785 print("tr", tr) 787 print( 786 print("new_tr", new_tr) 788 raise Exceptio 787 raise Exception("Self test failed!") 789 788 790 def OpenTimeRangeEnds(self): 789 def OpenTimeRangeEnds(self): 791 for time_ranges in self.split_ 790 for time_ranges in self.split_time_ranges_for_each_cpu: 792 OpenTimeRangeEnds(time 791 OpenTimeRangeEnds(time_ranges, self.min_time, self.max_time) 793 792 794 def CreateWorkList(self): 793 def CreateWorkList(self): 795 self.worklist = CreateWorkList 794 self.worklist = CreateWorkList(self.cmd, self.pipe_to, self.output_dir, self.cpus, self.split_time_ranges_for_each_cpu) 796 795 797 def PerfDataRecordedPerCPU(self): 796 def PerfDataRecordedPerCPU(self): 798 if "--per-thread" in self.cmd_ 797 if "--per-thread" in self.cmd_line.split(): 799 return False 798 return False 800 return True 799 return True 801 800 802 def DefaultToPerCPU(self): 801 def DefaultToPerCPU(self): 803 # --no-per-cpu option takes pr 802 # --no-per-cpu option takes precedence 804 if self.no_per_cpu: 803 if self.no_per_cpu: 805 return False 804 return False 806 if not self.PerfDataRecordedPe 805 if not self.PerfDataRecordedPerCPU(): 807 return False 806 return False 808 # Default to per-cpu for Intel 807 # Default to per-cpu for Intel PT data that was recorded per-cpu, 809 # because decoding can be done 808 # because decoding can be done for each CPU separately. 810 if self.IsIntelPT(): 809 if self.IsIntelPT(): 811 return True 810 return True 812 return False 811 return False 813 812 814 def Config(self): 813 def Config(self): 815 self.Init() 814 self.Init() 816 self.ExtractTimeInfo() 815 self.ExtractTimeInfo() 817 if not self.per_cpu: 816 if not self.per_cpu: 818 self.per_cpu = self.De 817 self.per_cpu = self.DefaultToPerCPU() 819 if self.verbosity.debug: 818 if self.verbosity.debug: 820 print("per_cpu", self. 819 print("per_cpu", self.per_cpu) 821 self.ExtractCPUInfo() 820 self.ExtractCPUInfo() 822 self.SplitTimeRanges() 821 self.SplitTimeRanges() 823 if self.verbosity.self_test: 822 if self.verbosity.self_test: 824 self.CheckTimeRanges() 823 self.CheckTimeRanges() 825 # Prefer open-ended time range 824 # Prefer open-ended time range to starting / ending with min_time / max_time resp. 826 self.OpenTimeRangeEnds() 825 self.OpenTimeRangeEnds() 827 self.CreateWorkList() 826 self.CreateWorkList() 828 827 829 def Run(self): 828 def Run(self): 830 if self.dry_run: 829 if self.dry_run: 831 print(len(self.worklis 830 print(len(self.worklist),"jobs:") 832 for w in self.worklist 831 for w in self.worklist: 833 print(w.Comman 832 print(w.Command()) 834 return True 833 return True 835 result = RunWork(self.worklist 834 result = RunWork(self.worklist, self.jobs, verbosity=self.verbosity) 836 if self.verbosity.verbose: 835 if self.verbosity.verbose: 837 print(glb_prog_name, " 836 print(glb_prog_name, "done") 838 return result 837 return result 839 838 840 def RunParallelPerf(a): 839 def RunParallelPerf(a): 841 pp = ParallelPerf(a) 840 pp = ParallelPerf(a) 842 pp.Config() 841 pp.Config() 843 return pp.Run() 842 return pp.Run() 844 843 845 def Main(args): 844 def Main(args): 846 ap = argparse.ArgumentParser( 845 ap = argparse.ArgumentParser( 847 prog=glb_prog_name, formatter_ 846 prog=glb_prog_name, formatter_class = argparse.RawDescriptionHelpFormatter, 848 description = 847 description = 849 """ 848 """ 850 Run a perf script command multiple times in pa 849 Run a perf script command multiple times in parallel, using perf script options 851 --cpu and --time so that each job processes a 850 --cpu and --time so that each job processes a different chunk of the data. 852 """, 851 """, 853 epilog = 852 epilog = 854 """ 853 """ 855 Follow the options by '--' and then the perf s 854 Follow the options by '--' and then the perf script command e.g. 856 855 857 $ perf record -a -- sleep 10 856 $ perf record -a -- sleep 10 858 $ parallel-perf.py --nr=4 -- perf scri 857 $ parallel-perf.py --nr=4 -- perf script --ns 859 All jobs finished successfully 858 All jobs finished successfully 860 $ tree parallel-perf-output/ 859 $ tree parallel-perf-output/ 861 parallel-perf-output/ 860 parallel-perf-output/ 862 ├── time-range-0 861 ├── time-range-0 863 │ ├── cmd.txt 862 │ ├── cmd.txt 864 │ └── out.txt 863 │ └── out.txt 865 ├── time-range-1 864 ├── time-range-1 866 │ ├── cmd.txt 865 │ ├── cmd.txt 867 │ └── out.txt 866 │ └── out.txt 868 ├── time-range-2 867 ├── time-range-2 869 │ ├── cmd.txt 868 │ ├── cmd.txt 870 │ └── out.txt 869 │ └── out.txt 871 └── time-range-3 870 └── time-range-3 872 ├── cmd.txt 871 ├── cmd.txt 873 └── out.txt 872 └── out.txt 874 $ find parallel-perf-output -name cmd. 873 $ find parallel-perf-output -name cmd.txt | sort | xargs grep -H . 875 parallel-perf-output/time-range-0/cmd. 874 parallel-perf-output/time-range-0/cmd.txt:perf script --time=,9466.504461499 --ns 876 parallel-perf-output/time-range-1/cmd. 875 parallel-perf-output/time-range-1/cmd.txt:perf script --time=9466.504461500,9469.005396999 --ns 877 parallel-perf-output/time-range-2/cmd. 876 parallel-perf-output/time-range-2/cmd.txt:perf script --time=9469.005397000,9471.506332499 --ns 878 parallel-perf-output/time-range-3/cmd. 877 parallel-perf-output/time-range-3/cmd.txt:perf script --time=9471.506332500, --ns 879 878 880 Any perf script command can be used, including 879 Any perf script command can be used, including the use of perf script options 881 --dlfilter and --script, so that the benefit o 880 --dlfilter and --script, so that the benefit of running parallel jobs 882 naturally extends to them also. 881 naturally extends to them also. 883 882 884 If option --pipe-to is used, standard output i 883 If option --pipe-to is used, standard output is first piped through that 885 command. Beware, if the command fails (e.g. gr 884 command. Beware, if the command fails (e.g. grep with no matches), it will be 886 considered a fatal error. 885 considered a fatal error. 887 886 888 Final standard output is redirected to files n 887 Final standard output is redirected to files named out.txt in separate 889 subdirectories under the output directory. Sim 888 subdirectories under the output directory. Similarly, standard error is 890 written to files named err.txt. In addition, f 889 written to files named err.txt. In addition, files named cmd.txt contain the 891 corresponding perf script command. After proce 890 corresponding perf script command. After processing, err.txt files are removed 892 if they are empty. 891 if they are empty. 893 892 894 If any job exits with a non-zero exit code, th 893 If any job exits with a non-zero exit code, then all jobs are killed and no 895 more are started. A message is printed if any 894 more are started. A message is printed if any job results in a non-empty 896 err.txt file. 895 err.txt file. 897 896 898 There is a separate output subdirectory for ea 897 There is a separate output subdirectory for each time range. If the --per-cpu 899 option is used, these are further grouped unde 898 option is used, these are further grouped under cpu-n subdirectories, e.g. 900 899 901 $ parallel-perf.py --per-cpu --nr=2 -- 900 $ parallel-perf.py --per-cpu --nr=2 -- perf script --ns --cpu=0,1 902 All jobs finished successfully 901 All jobs finished successfully 903 $ tree parallel-perf-output 902 $ tree parallel-perf-output 904 parallel-perf-output/ 903 parallel-perf-output/ 905 ├── cpu-0 904 ├── cpu-0 906 │ ├── time-range-0 905 │ ├── time-range-0 907 │ │ ├── cmd.txt 906 │ │ ├── cmd.txt 908 │ │ └── out.txt 907 │ │ └── out.txt 909 │ └── time-range-1 908 │ └── time-range-1 910 │ ├── cmd.txt 909 │ ├── cmd.txt 911 │ └── out.txt 910 │ └── out.txt 912 └── cpu-1 911 └── cpu-1 913 ├── time-range-0 912 ├── time-range-0 914 │ ├── cmd.txt 913 │ ├── cmd.txt 915 │ └── out.txt 914 │ └── out.txt 916 └── time-range-1 915 └── time-range-1 917 ├── cmd.txt 916 ├── cmd.txt 918 └── out.txt 917 └── out.txt 919 $ find parallel-perf-output -name cmd. 918 $ find parallel-perf-output -name cmd.txt | sort | xargs grep -H . 920 parallel-perf-output/cpu-0/time-range- 919 parallel-perf-output/cpu-0/time-range-0/cmd.txt:perf script --cpu=0 --time=,9469.005396999 --ns 921 parallel-perf-output/cpu-0/time-range- 920 parallel-perf-output/cpu-0/time-range-1/cmd.txt:perf script --cpu=0 --time=9469.005397000, --ns 922 parallel-perf-output/cpu-1/time-range- 921 parallel-perf-output/cpu-1/time-range-0/cmd.txt:perf script --cpu=1 --time=,9469.005396999 --ns 923 parallel-perf-output/cpu-1/time-range- 922 parallel-perf-output/cpu-1/time-range-1/cmd.txt:perf script --cpu=1 --time=9469.005397000, --ns 924 923 925 Subdivisions of time range, and cpus if the -- 924 Subdivisions of time range, and cpus if the --per-cpu option is used, are 926 expressed by the --time and --cpu perf script 925 expressed by the --time and --cpu perf script options respectively. If the 927 supplied perf script command has a --time opti 926 supplied perf script command has a --time option, then that time range is 928 subdivided, otherwise the time range given by 927 subdivided, otherwise the time range given by 'time of first sample' to 929 'time of last sample' is used (refer perf scri 928 'time of last sample' is used (refer perf script --header-only). Similarly, the 930 supplied perf script command may provide a --c 929 supplied perf script command may provide a --cpu option, and only those CPUs 931 will be processed. 930 will be processed. 932 931 933 To prevent time intervals becoming too small, 932 To prevent time intervals becoming too small, the --min-interval option can 934 be used. 933 be used. 935 934 936 Note there is special handling for processing 935 Note there is special handling for processing Intel PT traces. If an interval is 937 not specified and the perf record command cont 936 not specified and the perf record command contained the intel_pt event, then the 938 time range will be subdivided in order to prod 937 time range will be subdivided in order to produce subdivisions that contain 939 approximately the same amount of trace data. T 938 approximately the same amount of trace data. That is accomplished by counting 940 double-quick (--itrace=qqi) samples, and choos 939 double-quick (--itrace=qqi) samples, and choosing time ranges that encompass 941 approximately the same number of samples. In t 940 approximately the same number of samples. In that case, time ranges may not be 942 the same for each CPU processed. For Intel PT, 941 the same for each CPU processed. For Intel PT, --per-cpu is the default, but 943 that can be overridden by --no-per-cpu. Note, 942 that can be overridden by --no-per-cpu. Note, for Intel PT, double-quick 944 decoding produces 1 sample for each PSB synchr 943 decoding produces 1 sample for each PSB synchronization packet, which in turn 945 come after a certain number of bytes output, d 944 come after a certain number of bytes output, determined by psb_period (refer 946 perf Intel PT documentation). The minimum numb 945 perf Intel PT documentation). The minimum number of double-quick samples that 947 will define a time range can be set by the --m 946 will define a time range can be set by the --min_size option, which defaults to 948 64. 947 64. 949 """) 948 """) 950 ap.add_argument("-o", "--output-dir", 949 ap.add_argument("-o", "--output-dir", default="parallel-perf-output", help="output directory (default 'parallel-perf-output')") 951 ap.add_argument("-j", "--jobs", type=i 950 ap.add_argument("-j", "--jobs", type=int, default=0, help="maximum number of jobs to run in parallel at one time (default is the number of CPUs)") 952 ap.add_argument("-n", "--nr", type=int 951 ap.add_argument("-n", "--nr", type=int, default=0, help="number of time subdivisions (default is the number of jobs)") 953 ap.add_argument("-i", "--interval", ty 952 ap.add_argument("-i", "--interval", type=float, default=0, help="subdivide the time range using this time interval (in seconds e.g. 0.1 for a tenth of a second)") 954 ap.add_argument("-c", "--per-cpu", act 953 ap.add_argument("-c", "--per-cpu", action="store_true", help="process data for each CPU in parallel") 955 ap.add_argument("-m", "--min-interval" 954 ap.add_argument("-m", "--min-interval", type=float, default=glb_min_interval, help=f"minimum interval (default {glb_min_interval} seconds)") 956 ap.add_argument("-p", "--pipe-to", hel 955 ap.add_argument("-p", "--pipe-to", help="command to pipe output to (optional)") 957 ap.add_argument("-N", "--no-per-cpu", 956 ap.add_argument("-N", "--no-per-cpu", action="store_true", help="do not process data for each CPU in parallel") 958 ap.add_argument("-b", "--min_size", ty 957 ap.add_argument("-b", "--min_size", type=int, default=glb_min_samples, help="minimum data size (for Intel PT in PSBs)") 959 ap.add_argument("-D", "--dry-run", act 958 ap.add_argument("-D", "--dry-run", action="store_true", help="do not run any jobs, just show the perf script commands") 960 ap.add_argument("-q", "--quiet", actio 959 ap.add_argument("-q", "--quiet", action="store_true", help="do not print any messages except errors") 961 ap.add_argument("-v", "--verbose", act 960 ap.add_argument("-v", "--verbose", action="store_true", help="print more messages") 962 ap.add_argument("-d", "--debug", actio 961 ap.add_argument("-d", "--debug", action="store_true", help="print debugging messages") 963 cmd_line = list(args) 962 cmd_line = list(args) 964 try: 963 try: 965 split_pos = cmd_line.index("-- 964 split_pos = cmd_line.index("--") 966 cmd = cmd_line[split_pos + 1:] 965 cmd = cmd_line[split_pos + 1:] 967 args = cmd_line[:split_pos] 966 args = cmd_line[:split_pos] 968 except: 967 except: 969 cmd = None 968 cmd = None 970 args = cmd_line 969 args = cmd_line 971 a = ap.parse_args(args=args[1:]) 970 a = ap.parse_args(args=args[1:]) 972 a.cmd = cmd 971 a.cmd = cmd 973 a.verbosity = Verbosity(a.quiet, a.ver 972 a.verbosity = Verbosity(a.quiet, a.verbose, a.debug) 974 try: 973 try: 975 if a.cmd == None: 974 if a.cmd == None: 976 if len(args) <= 1: 975 if len(args) <= 1: 977 ap.print_help( 976 ap.print_help() 978 return True 977 return True 979 raise Exception("Comma 978 raise Exception("Command line must contain '--' before perf command") 980 return RunParallelPerf(a) 979 return RunParallelPerf(a) 981 except Exception as e: 980 except Exception as e: 982 print("Fatal error: ", str(e)) 981 print("Fatal error: ", str(e)) 983 if a.debug: 982 if a.debug: 984 raise 983 raise 985 return False 984 return False 986 985 987 if __name__ == "__main__": 986 if __name__ == "__main__": 988 if not Main(sys.argv): 987 if not Main(sys.argv): 989 sys.exit(1) 988 sys.exit(1)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.