1 #!/usr/bin/env python3 2 # SPDX-License-Identifier: GPL-2.0-only 3 # Copyright (C) 2024 ARM Ltd. 4 # 5 # Utility providing smaps-like output detailin 6 # For more info, run: 7 # ./thpmaps --help 8 # 9 # Requires numpy: 10 # pip3 install numpy 11 12 13 import argparse 14 import collections 15 import math 16 import os 17 import re 18 import resource 19 import shutil 20 import sys 21 import textwrap 22 import time 23 import numpy as np 24 25 26 with open('/sys/kernel/mm/transparent_hugepage 27 PAGE_SIZE = resource.getpagesize() 28 PAGE_SHIFT = int(math.log2(PAGE_SIZE)) 29 PMD_SIZE = int(f.read()) 30 PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_ 31 32 33 def align_forward(v, a): 34 return (v + (a - 1)) & ~(a - 1) 35 36 37 def align_offset(v, a): 38 return v & (a - 1) 39 40 41 def kbnr(kb): 42 # Convert KB to number of pages. 43 return (kb << 10) >> PAGE_SHIFT 44 45 46 def nrkb(nr): 47 # Convert number of pages to KB. 48 return (nr << PAGE_SHIFT) >> 10 49 50 51 def odkb(order): 52 # Convert page order to KB. 53 return (PAGE_SIZE << order) >> 10 54 55 56 def cont_ranges_all(search, index): 57 # Given a list of arrays, find the ranges 58 # incrementing in all arrays. all arrays i 59 # same size. 60 sz = len(search[0]) 61 r = np.full(sz, 2) 62 d = np.diff(search[0]) == 1 63 for dd in [np.diff(arr) == 1 for arr in se 64 d &= dd 65 r[1:] -= d 66 r[:-1] -= d 67 return [np.repeat(arr, r).reshape(-1, 2) f 68 69 70 class ArgException(Exception): 71 pass 72 73 74 class FileIOException(Exception): 75 pass 76 77 78 class BinArrayFile: 79 # Base class used to read /proc/<pid>/page 80 # numpy array. Use inherrited class in a w 81 # closed when it goes out of scope. 82 def __init__(self, filename, element_size) 83 self.element_size = element_size 84 self.filename = filename 85 self.fd = os.open(self.filename, os.O_ 86 87 def cleanup(self): 88 os.close(self.fd) 89 90 def __enter__(self): 91 return self 92 93 def __exit__(self, exc_type, exc_val, exc_ 94 self.cleanup() 95 96 def _readin(self, offset, buffer): 97 length = os.preadv(self.fd, (buffer,), 98 if len(buffer) != length: 99 raise FileIOException('error: {} f 100 .format(self.filen 101 102 def _toarray(self, buf): 103 assert(self.element_size == 8) 104 return np.frombuffer(buf, dtype=np.uin 105 106 def getv(self, vec): 107 vec *= self.element_size 108 offsets = vec[:, 0] 109 lengths = (np.diff(vec) + self.element 110 buf = bytearray(int(np.sum(lengths))) 111 view = memoryview(buf) 112 pos = 0 113 for offset, length in zip(offsets, len 114 offset = int(offset) 115 length = int(length) 116 self._readin(offset, view[pos:pos+ 117 pos += length 118 return self._toarray(buf) 119 120 def get(self, index, nr=1): 121 offset = index * self.element_size 122 length = nr * self.element_size 123 buf = bytearray(length) 124 self._readin(offset, buf) 125 return self._toarray(buf) 126 127 128 PM_PAGE_PRESENT = 1 << 63 129 PM_PFN_MASK = (1 << 55) - 1 130 131 class PageMap(BinArrayFile): 132 # Read ranges of a given pid's pagemap int 133 def __init__(self, pid='self'): 134 super().__init__(f'/proc/{pid}/pagemap 135 136 137 KPF_ANON = 1 << 12 138 KPF_COMPOUND_HEAD = 1 << 15 139 KPF_COMPOUND_TAIL = 1 << 16 140 KPF_THP = 1 << 22 141 142 class KPageFlags(BinArrayFile): 143 # Read ranges of /proc/kpageflags into a n 144 def __init__(self): 145 super().__init__(f'/proc/kpageflags', 146 147 148 vma_all_stats = set([ 149 "Size", 150 "Rss", 151 "Pss", 152 "Pss_Dirty", 153 "Shared_Clean", 154 "Shared_Dirty", 155 "Private_Clean", 156 "Private_Dirty", 157 "Referenced", 158 "Anonymous", 159 "KSM", 160 "LazyFree", 161 "AnonHugePages", 162 "ShmemPmdMapped", 163 "FilePmdMapped", 164 "Shared_Hugetlb", 165 "Private_Hugetlb", 166 "Swap", 167 "SwapPss", 168 "Locked", 169 ]) 170 171 vma_min_stats = set([ 172 "Rss", 173 "Anonymous", 174 "AnonHugePages", 175 "ShmemPmdMapped", 176 "FilePmdMapped", 177 ]) 178 179 VMA = collections.namedtuple('VMA', [ 180 'name', 181 'start', 182 'end', 183 'read', 184 'write', 185 'execute', 186 'private', 187 'pgoff', 188 'major', 189 'minor', 190 'inode', 191 'stats', 192 ]) 193 194 class VMAList: 195 # A container for VMAs, parsed from /proc/ 196 # instance to receive VMAs. 197 def __init__(self, pid='self', stats=[]): 198 self.vmas = [] 199 with open(f'/proc/{pid}/smaps', 'r') a 200 for line in file: 201 elements = line.split() 202 if '-' in elements[0]: 203 start, end = map(lambda x: 204 major, minor = map(lambda 205 self.vmas.append(VMA( 206 name=elements[5] if le 207 start=start, 208 end=end, 209 read=elements[1][0] == 210 write=elements[1][1] = 211 execute=elements[1][2] 212 private=elements[1][3] 213 pgoff=int(elements[2], 214 major=major, 215 minor=minor, 216 inode=int(elements[4], 217 stats={}, 218 )) 219 else: 220 param = elements[0][:-1] 221 if param in stats: 222 value = int(elements[1 223 self.vmas[-1].stats[pa 224 225 def __iter__(self): 226 yield from self.vmas 227 228 229 def thp_parse(vma, kpageflags, ranges, indexes 230 # Given 4 same-sized arrays representing a 231 # by THPs (vfns: virtual frame numbers, pf 232 # True if page is anonymous, heads: True i 233 # dictionary of statistics describing the 234 stats = { 235 'file': { 236 'partial': 0, 237 'aligned': [0] * (PMD_ORDER + 1), 238 'unaligned': [0] * (PMD_ORDER + 1) 239 }, 240 'anon': { 241 'partial': 0, 242 'aligned': [0] * (PMD_ORDER + 1), 243 'unaligned': [0] * (PMD_ORDER + 1) 244 }, 245 } 246 247 for rindex, rpfn in zip(ranges[0], ranges[ 248 index_next = int(rindex[0]) 249 index_end = int(rindex[1]) + 1 250 pfn_end = int(rpfn[1]) + 1 251 252 folios = indexes[index_next:index_end] 253 254 # Account pages for any partially mapp 255 # the first page of the range is a tai 256 nr = (int(folios[0]) if len(folios) el 257 stats['anon' if anons[index_next] else 258 259 # Account pages for any partially mapp 260 # the next page after the range is a t 261 if len(folios): 262 flags = int(kpageflags.get(pfn_end 263 if flags & KPF_COMPOUND_TAIL: 264 nr = index_end - int(folios[-1 265 folios = folios[:-1] 266 index_end -= nr 267 stats['anon' if anons[index_en 268 269 # Account fully mapped THPs in the mid 270 if len(folios): 271 folio_nrs = np.append(np.diff(foli 272 folio_orders = np.log2(folio_nrs). 273 for index, order in zip(folios, fo 274 index = int(index) 275 order = int(order) 276 nr = 1 << order 277 vfn = int(vfns[index]) 278 align = 'aligned' if align_for 279 anon = 'anon' if anons[index] 280 stats[anon][align][order] += n 281 282 # Account PMD-mapped THPs spearately, so f 283 # race between acquiring the smaps stats a 284 # could be deallocated. So clamp to zero i 285 anon_pmd_mapped = vma.stats['AnonHugePages 286 file_pmd_mapped = vma.stats['ShmemPmdMappe 287 vma.stats['FilePmdMapped 288 stats['anon']['aligned'][PMD_ORDER] = max( 289 stats['file']['aligned'][PMD_ORDER] = max( 290 291 rstats = { 292 f"anon-thp-pmd-aligned-{odkb(PMD_ORDER 293 f"file-thp-pmd-aligned-{odkb(PMD_ORDER 294 } 295 296 def flatten_sub(type, subtype, stats): 297 param = f"{type}-thp-pte-{subtype}-{{} 298 for od, nr in enumerate(stats[2:], 2): 299 rstats[param.format(odkb(od))] = { 300 301 def flatten_type(type, stats): 302 flatten_sub(type, 'aligned', stats['al 303 flatten_sub(type, 'unaligned', stats[' 304 rstats[f"{type}-thp-pte-partial"] = {' 305 306 flatten_type('anon', stats['anon']) 307 flatten_type('file', stats['file']) 308 309 return rstats 310 311 312 def cont_parse(vma, order, ranges, anons, head 313 # Given 4 same-sized arrays representing a 314 # by THPs (vfns: virtual frame numbers, pf 315 # True if page is anonymous, heads: True i 316 # dictionary of statistics describing the 317 nr_cont = 1 << order 318 nr_anon = 0 319 nr_file = 0 320 321 for rindex, rvfn, rpfn in zip(*ranges): 322 index_next = int(rindex[0]) 323 index_end = int(rindex[1]) + 1 324 vfn_start = int(rvfn[0]) 325 pfn_start = int(rpfn[0]) 326 327 if align_offset(pfn_start, nr_cont) != 328 continue 329 330 off = align_forward(vfn_start, nr_cont 331 index_next += off 332 333 while index_next + nr_cont <= index_en 334 folio_boundary = heads[index_next+ 335 if not folio_boundary: 336 if anons[index_next]: 337 nr_anon += nr_cont 338 else: 339 nr_file += nr_cont 340 index_next += nr_cont 341 342 # Account blocks that are PMD-mapped spear 343 # There is a race between acquiring the sm 344 # where memory could be deallocated. So cl 345 # gone negative. 346 anon_pmd_mapped = vma.stats['AnonHugePages 347 file_pmd_mapped = vma.stats['ShmemPmdMappe 348 vma.stats['FilePmdMapped'] 349 nr_anon = max(0, nr_anon - kbnr(anon_pmd_m 350 nr_file = max(0, nr_file - kbnr(file_pmd_m 351 352 rstats = { 353 f"anon-cont-pmd-aligned-{nrkb(nr_cont) 354 f"file-cont-pmd-aligned-{nrkb(nr_cont) 355 } 356 357 rstats[f"anon-cont-pte-aligned-{nrkb(nr_co 358 rstats[f"file-cont-pte-aligned-{nrkb(nr_co 359 360 return rstats 361 362 363 def vma_print(vma, pid): 364 # Prints a VMA instance in a format simila 365 # that the pid is included as the first va 366 print("{:010d}: {:016x}-{:016x} {}{}{}{} { 367 .format( 368 pid, vma.start, vma.end, 369 'r' if vma.read else '-', 'w' if v 370 'x' if vma.execute else '-', 'p' i 371 vma.pgoff, vma.major, vma.minor, v 372 )) 373 374 375 def stats_print(stats, tot_anon, tot_file, inc 376 # Print a statistics dictionary. 377 label_field = 32 378 for label, stat in stats.items(): 379 type = stat['type'] 380 value = stat['value'] 381 if value or inc_empty: 382 pad = max(0, label_field - len(lab 383 if type == 'anon' and tot_anon > 0 384 percent = f' ({value / tot_ano 385 elif type == 'file' and tot_file > 386 percent = f' ({value / tot_fil 387 else: 388 percent = '' 389 print(f"{label}:{' ' * pad}{value: 390 391 392 def vma_parse(vma, pagemap, kpageflags, contor 393 # Generate thp and cont statistics for a s 394 start = vma.start >> PAGE_SHIFT 395 end = vma.end >> PAGE_SHIFT 396 397 pmes = pagemap.get(start, end - start) 398 present = pmes & PM_PAGE_PRESENT != 0 399 pfns = pmes & PM_PFN_MASK 400 pfns = pfns[present] 401 vfns = np.arange(start, end, dtype=np.uint 402 vfns = vfns[present] 403 404 pfn_vec = cont_ranges_all([pfns], [pfns])[ 405 flags = kpageflags.getv(pfn_vec) 406 anons = flags & KPF_ANON != 0 407 heads = flags & KPF_COMPOUND_HEAD != 0 408 thps = flags & KPF_THP != 0 409 410 vfns = vfns[thps] 411 pfns = pfns[thps] 412 anons = anons[thps] 413 heads = heads[thps] 414 415 indexes = np.arange(len(vfns), dtype=np.ui 416 ranges = cont_ranges_all([vfns, pfns], [in 417 418 thpstats = thp_parse(vma, kpageflags, rang 419 contstats = [cont_parse(vma, order, ranges 420 421 tot_anon = vma.stats['Anonymous']['value'] 422 tot_file = vma.stats['Rss']['value'] - tot 423 424 return { 425 **thpstats, 426 **{k: v for s in contstats for k, v in 427 }, tot_anon, tot_file 428 429 430 def do_main(args): 431 pids = set() 432 rollup = {} 433 rollup_anon = 0 434 rollup_file = 0 435 436 if args.cgroup: 437 strict = False 438 for walk_info in os.walk(args.cgroup): 439 cgroup = walk_info[0] 440 with open(f'{cgroup}/cgroup.procs' 441 for line in pidfile.readlines( 442 pids.add(int(line.strip()) 443 elif args.pid: 444 strict = True 445 pids = pids.union(args.pid) 446 else: 447 strict = False 448 for pid in os.listdir('/proc'): 449 if pid.isdigit(): 450 pids.add(int(pid)) 451 452 if not args.rollup: 453 print(" PID START 454 455 for pid in pids: 456 try: 457 with PageMap(pid) as pagemap: 458 with KPageFlags() as kpageflag 459 for vma in VMAList(pid, vm 460 if (vma.read or vma.wr 461 stats, vma_anon, v 462 else: 463 stats = {} 464 vma_anon = 0 465 vma_file = 0 466 if args.inc_smaps: 467 stats = {**vma.sta 468 if args.rollup: 469 for k, v in stats. 470 if k in rollup 471 assert(rol 472 rollup[k][ 473 else: 474 rollup[k] 475 rollup_anon += vma 476 rollup_file += vma 477 else: 478 vma_print(vma, pid 479 stats_print(stats, 480 except (FileNotFoundError, ProcessLook 481 if strict: 482 raise 483 484 if args.rollup: 485 stats_print(rollup, rollup_anon, rollu 486 487 488 def main(): 489 docs_width = shutil.get_terminal_size().co 490 docs_width -= 2 491 docs_width = min(80, docs_width) 492 493 def format(string): 494 text = re.sub(r'\s+', ' ', string) 495 text = re.sub(r'\s*\\n\s*', '\n', text 496 paras = text.split('\n') 497 paras = [textwrap.fill(p, width=docs_w 498 return '\n'.join(paras) 499 500 def formatter(prog): 501 return argparse.RawDescriptionHelpForm 502 503 def size2order(human): 504 units = { 505 "K": 2**10, "M": 2**20, "G": 2**30 506 "k": 2**10, "m": 2**20, "g": 2**30 507 } 508 unit = 1 509 if human[-1] in units: 510 unit = units[human[-1]] 511 human = human[:-1] 512 try: 513 size = int(human) 514 except ValueError: 515 raise ArgException('error: --cont 516 size *= unit 517 order = int(math.log2(size / PAGE_SIZE 518 if order < 1: 519 raise ArgException('error: --cont 520 if (1 << order) * PAGE_SIZE != size: 521 raise ArgException('error: --cont 522 if order > PMD_ORDER: 523 raise ArgException('error: --cont 524 return order 525 526 parser = argparse.ArgumentParser(formatter 527 description=format("""Prints informati 528 pages are mapped, either s 529 process or cgroup.\\n 530 \\n 531 When run with --pid, the u 532 of pids to scan. e.g. "--p 533 with --cgroup, the user pa 534 all pids that belong to th 535 run with neither --pid nor 536 the system is gathered fro 537 had provided "--pid 1 --pi 538 \\n 539 A default set of statistic 540 mappings. However, it is a 541 additional statistics for 542 the block size is user-def 543 \\n 544 Statistics are maintained 545 file-backed (pagecache) me 546 as a percentage of either 547 file-backed memory as appr 548 \\n 549 THP Statistics\\n 550 --------------\\n 551 \\n 552 Statistics are always gene 553 contiguously-mapped THPs w 554 their size, for each <size 555 Separate counters describe 556 mapped by PMD. (Although n 557 PMD if it is PMD-sized):\\ 558 \\n 559 - anon-thp-pte-aligned-<si 560 - file-thp-pte-aligned-<si 561 - anon-thp-pmd-aligned-<si 562 - file-thp-pmd-aligned-<si 563 \\n 564 Similarly, statistics are 565 contiguously-mapped THPs w 566 aligned to their size, for 567 system. Due to the unalign 568 map by PMD, so there are o 569 \\n 570 - anon-thp-pte-unaligned-< 571 - file-thp-pte-unaligned-< 572 \\n 573 Statistics are also always 574 belong to a THP but where 575 contiguously- mapped. Thes 576 counted in the same counte 577 THP that is partially mapp 578 \\n 579 - anon-thp-pte-partial\\n 580 - file-thp-pte-partial\\n 581 \\n 582 Contiguous Block Statistic 583 -------------------------- 584 \\n 585 An optional, additional se 586 every contiguous block siz 587 These statistics show how 588 contiguous blocks of <size 589 given contiguous block mus 590 there is no requirement fo 591 Separate counters describe 592 vs those mapped by PMD:\\n 593 \\n 594 - anon-cont-pte-aligned-<s 595 - file-cont-pte-aligned-<s 596 - anon-cont-pmd-aligned-<s 597 - file-cont-pmd-aligned-<s 598 \\n 599 As an example, if monitori 600 64K), there are a number o 601 blocks: a fully- and conti 602 aligned to a 64K boundary 603 and contiguously-mapped 12 604 a 64K boundary would provi 605 maps its first 100K, but c 606 boundary would provide 1 b 607 contiguously-mapped 2M THP 608 are many other possible pe 609 epilog=format("""Requires root privile 610 kpageflags.""")) 611 612 group = parser.add_mutually_exclusive_grou 613 group.add_argument('--pid', 614 metavar='pid', required=False, type=in 615 help="""Process id of the target proce 616 scan multiple processes. --pid and 617 If neither are provided, all proce 618 system-wide information.""") 619 620 group.add_argument('--cgroup', 621 metavar='path', required=False, 622 help="""Path to the target cgroup in s 623 the cgroup and its children. --pid 624 exclusive. If neither are provided 625 provide system-wide information."" 626 627 parser.add_argument('--rollup', 628 required=False, default=False, action= 629 help="""Sum the per-vma statistics to 630 system, process or cgroup.""") 631 632 parser.add_argument('--cont', 633 metavar='size[KMG]', required=False, d 634 help="""Adds stats for memory that is 635 <size> and also aligned to <size>. 636 track multiple sized blocks. Usefu 637 hpa mappings. Size must be a power 638 639 parser.add_argument('--inc-smaps', 640 required=False, default=False, action= 641 help="""Include all numerical, additiv 642 output.""") 643 644 parser.add_argument('--inc-empty', 645 required=False, default=False, action= 646 help="""Show all statistics including 647 648 parser.add_argument('--periodic', 649 metavar='sleep_ms', required=False, ty 650 help="""Run in a loop, polling every s 651 652 args = parser.parse_args() 653 654 try: 655 args.cont = [size2order(cont) for cont 656 except ArgException as e: 657 parser.print_usage() 658 raise 659 660 if args.periodic: 661 while True: 662 do_main(args) 663 print() 664 time.sleep(args.periodic / 1000) 665 else: 666 do_main(args) 667 668 669 if __name__ == "__main__": 670 try: 671 main() 672 except Exception as e: 673 prog = os.path.basename(sys.argv[0]) 674 print(f'{prog}: {e}') 675 exit(1)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.