1 #!/usr/bin/env python3 1 #!/usr/bin/env python3 2 # SPDX-License-Identifier: GPL-2.0-only 2 # SPDX-License-Identifier: GPL-2.0-only 3 # Copyright (C) 2024 ARM Ltd. 3 # Copyright (C) 2024 ARM Ltd. 4 # 4 # 5 # Utility providing smaps-like output detailin 5 # Utility providing smaps-like output detailing transparent hugepage usage. 6 # For more info, run: 6 # For more info, run: 7 # ./thpmaps --help 7 # ./thpmaps --help 8 # 8 # 9 # Requires numpy: 9 # Requires numpy: 10 # pip3 install numpy 10 # pip3 install numpy 11 11 12 12 13 import argparse 13 import argparse 14 import collections 14 import collections 15 import math 15 import math 16 import os 16 import os 17 import re 17 import re 18 import resource 18 import resource 19 import shutil 19 import shutil 20 import sys 20 import sys 21 import textwrap 21 import textwrap 22 import time 22 import time 23 import numpy as np 23 import numpy as np 24 24 25 25 26 with open('/sys/kernel/mm/transparent_hugepage 26 with open('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f: 27 PAGE_SIZE = resource.getpagesize() 27 PAGE_SIZE = resource.getpagesize() 28 PAGE_SHIFT = int(math.log2(PAGE_SIZE)) 28 PAGE_SHIFT = int(math.log2(PAGE_SIZE)) 29 PMD_SIZE = int(f.read()) 29 PMD_SIZE = int(f.read()) 30 PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_ 30 PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE)) 31 31 32 32 33 def align_forward(v, a): 33 def align_forward(v, a): 34 return (v + (a - 1)) & ~(a - 1) 34 return (v + (a - 1)) & ~(a - 1) 35 35 36 36 37 def align_offset(v, a): 37 def align_offset(v, a): 38 return v & (a - 1) 38 return v & (a - 1) 39 39 40 40 41 def kbnr(kb): 41 def kbnr(kb): 42 # Convert KB to number of pages. 42 # Convert KB to number of pages. 43 return (kb << 10) >> PAGE_SHIFT 43 return (kb << 10) >> PAGE_SHIFT 44 44 45 45 46 def nrkb(nr): 46 def nrkb(nr): 47 # Convert number of pages to KB. 47 # Convert number of pages to KB. 48 return (nr << PAGE_SHIFT) >> 10 48 return (nr << PAGE_SHIFT) >> 10 49 49 50 50 51 def odkb(order): 51 def odkb(order): 52 # Convert page order to KB. 52 # Convert page order to KB. 53 return (PAGE_SIZE << order) >> 10 53 return (PAGE_SIZE << order) >> 10 54 54 55 55 56 def cont_ranges_all(search, index): 56 def cont_ranges_all(search, index): 57 # Given a list of arrays, find the ranges 57 # Given a list of arrays, find the ranges for which values are monotonically 58 # incrementing in all arrays. all arrays i 58 # incrementing in all arrays. all arrays in search and index must be the 59 # same size. 59 # same size. 60 sz = len(search[0]) 60 sz = len(search[0]) 61 r = np.full(sz, 2) 61 r = np.full(sz, 2) 62 d = np.diff(search[0]) == 1 62 d = np.diff(search[0]) == 1 63 for dd in [np.diff(arr) == 1 for arr in se 63 for dd in [np.diff(arr) == 1 for arr in search[1:]]: 64 d &= dd 64 d &= dd 65 r[1:] -= d 65 r[1:] -= d 66 r[:-1] -= d 66 r[:-1] -= d 67 return [np.repeat(arr, r).reshape(-1, 2) f 67 return [np.repeat(arr, r).reshape(-1, 2) for arr in index] 68 68 69 69 70 class ArgException(Exception): 70 class ArgException(Exception): 71 pass 71 pass 72 72 73 73 74 class FileIOException(Exception): 74 class FileIOException(Exception): 75 pass 75 pass 76 76 77 77 78 class BinArrayFile: 78 class BinArrayFile: 79 # Base class used to read /proc/<pid>/page 79 # Base class used to read /proc/<pid>/pagemap and /proc/kpageflags into a 80 # numpy array. Use inherrited class in a w 80 # numpy array. Use inherrited class in a with clause to ensure file is 81 # closed when it goes out of scope. 81 # closed when it goes out of scope. 82 def __init__(self, filename, element_size) 82 def __init__(self, filename, element_size): 83 self.element_size = element_size 83 self.element_size = element_size 84 self.filename = filename 84 self.filename = filename 85 self.fd = os.open(self.filename, os.O_ 85 self.fd = os.open(self.filename, os.O_RDONLY) 86 86 87 def cleanup(self): 87 def cleanup(self): 88 os.close(self.fd) 88 os.close(self.fd) 89 89 90 def __enter__(self): 90 def __enter__(self): 91 return self 91 return self 92 92 93 def __exit__(self, exc_type, exc_val, exc_ 93 def __exit__(self, exc_type, exc_val, exc_tb): 94 self.cleanup() 94 self.cleanup() 95 95 96 def _readin(self, offset, buffer): 96 def _readin(self, offset, buffer): 97 length = os.preadv(self.fd, (buffer,), 97 length = os.preadv(self.fd, (buffer,), offset) 98 if len(buffer) != length: 98 if len(buffer) != length: 99 raise FileIOException('error: {} f 99 raise FileIOException('error: {} failed to read {} bytes at {:x}' 100 .format(self.filen 100 .format(self.filename, len(buffer), offset)) 101 101 102 def _toarray(self, buf): 102 def _toarray(self, buf): 103 assert(self.element_size == 8) 103 assert(self.element_size == 8) 104 return np.frombuffer(buf, dtype=np.uin 104 return np.frombuffer(buf, dtype=np.uint64) 105 105 106 def getv(self, vec): 106 def getv(self, vec): 107 vec *= self.element_size 107 vec *= self.element_size 108 offsets = vec[:, 0] 108 offsets = vec[:, 0] 109 lengths = (np.diff(vec) + self.element 109 lengths = (np.diff(vec) + self.element_size).reshape(len(vec)) 110 buf = bytearray(int(np.sum(lengths))) 110 buf = bytearray(int(np.sum(lengths))) 111 view = memoryview(buf) 111 view = memoryview(buf) 112 pos = 0 112 pos = 0 113 for offset, length in zip(offsets, len 113 for offset, length in zip(offsets, lengths): 114 offset = int(offset) 114 offset = int(offset) 115 length = int(length) 115 length = int(length) 116 self._readin(offset, view[pos:pos+ 116 self._readin(offset, view[pos:pos+length]) 117 pos += length 117 pos += length 118 return self._toarray(buf) 118 return self._toarray(buf) 119 119 120 def get(self, index, nr=1): 120 def get(self, index, nr=1): 121 offset = index * self.element_size 121 offset = index * self.element_size 122 length = nr * self.element_size 122 length = nr * self.element_size 123 buf = bytearray(length) 123 buf = bytearray(length) 124 self._readin(offset, buf) 124 self._readin(offset, buf) 125 return self._toarray(buf) 125 return self._toarray(buf) 126 126 127 127 128 PM_PAGE_PRESENT = 1 << 63 128 PM_PAGE_PRESENT = 1 << 63 129 PM_PFN_MASK = (1 << 55) - 1 129 PM_PFN_MASK = (1 << 55) - 1 130 130 131 class PageMap(BinArrayFile): 131 class PageMap(BinArrayFile): 132 # Read ranges of a given pid's pagemap int 132 # Read ranges of a given pid's pagemap into a numpy array. 133 def __init__(self, pid='self'): 133 def __init__(self, pid='self'): 134 super().__init__(f'/proc/{pid}/pagemap 134 super().__init__(f'/proc/{pid}/pagemap', 8) 135 135 136 136 137 KPF_ANON = 1 << 12 137 KPF_ANON = 1 << 12 138 KPF_COMPOUND_HEAD = 1 << 15 138 KPF_COMPOUND_HEAD = 1 << 15 139 KPF_COMPOUND_TAIL = 1 << 16 139 KPF_COMPOUND_TAIL = 1 << 16 140 KPF_THP = 1 << 22 140 KPF_THP = 1 << 22 141 141 142 class KPageFlags(BinArrayFile): 142 class KPageFlags(BinArrayFile): 143 # Read ranges of /proc/kpageflags into a n 143 # Read ranges of /proc/kpageflags into a numpy array. 144 def __init__(self): 144 def __init__(self): 145 super().__init__(f'/proc/kpageflags', 145 super().__init__(f'/proc/kpageflags', 8) 146 146 147 147 148 vma_all_stats = set([ 148 vma_all_stats = set([ 149 "Size", 149 "Size", 150 "Rss", 150 "Rss", 151 "Pss", 151 "Pss", 152 "Pss_Dirty", 152 "Pss_Dirty", 153 "Shared_Clean", 153 "Shared_Clean", 154 "Shared_Dirty", 154 "Shared_Dirty", 155 "Private_Clean", 155 "Private_Clean", 156 "Private_Dirty", 156 "Private_Dirty", 157 "Referenced", 157 "Referenced", 158 "Anonymous", 158 "Anonymous", 159 "KSM", 159 "KSM", 160 "LazyFree", 160 "LazyFree", 161 "AnonHugePages", 161 "AnonHugePages", 162 "ShmemPmdMapped", 162 "ShmemPmdMapped", 163 "FilePmdMapped", 163 "FilePmdMapped", 164 "Shared_Hugetlb", 164 "Shared_Hugetlb", 165 "Private_Hugetlb", 165 "Private_Hugetlb", 166 "Swap", 166 "Swap", 167 "SwapPss", 167 "SwapPss", 168 "Locked", 168 "Locked", 169 ]) 169 ]) 170 170 171 vma_min_stats = set([ 171 vma_min_stats = set([ 172 "Rss", 172 "Rss", 173 "Anonymous", 173 "Anonymous", 174 "AnonHugePages", 174 "AnonHugePages", 175 "ShmemPmdMapped", 175 "ShmemPmdMapped", 176 "FilePmdMapped", 176 "FilePmdMapped", 177 ]) 177 ]) 178 178 179 VMA = collections.namedtuple('VMA', [ 179 VMA = collections.namedtuple('VMA', [ 180 'name', 180 'name', 181 'start', 181 'start', 182 'end', 182 'end', 183 'read', 183 'read', 184 'write', 184 'write', 185 'execute', 185 'execute', 186 'private', 186 'private', 187 'pgoff', 187 'pgoff', 188 'major', 188 'major', 189 'minor', 189 'minor', 190 'inode', 190 'inode', 191 'stats', 191 'stats', 192 ]) 192 ]) 193 193 194 class VMAList: 194 class VMAList: 195 # A container for VMAs, parsed from /proc/ 195 # A container for VMAs, parsed from /proc/<pid>/smaps. Iterate over the 196 # instance to receive VMAs. 196 # instance to receive VMAs. 197 def __init__(self, pid='self', stats=[]): 197 def __init__(self, pid='self', stats=[]): 198 self.vmas = [] 198 self.vmas = [] 199 with open(f'/proc/{pid}/smaps', 'r') a 199 with open(f'/proc/{pid}/smaps', 'r') as file: 200 for line in file: 200 for line in file: 201 elements = line.split() 201 elements = line.split() 202 if '-' in elements[0]: 202 if '-' in elements[0]: 203 start, end = map(lambda x: 203 start, end = map(lambda x: int(x, 16), elements[0].split('-')) 204 major, minor = map(lambda 204 major, minor = map(lambda x: int(x, 16), elements[3].split(':')) 205 self.vmas.append(VMA( 205 self.vmas.append(VMA( 206 name=elements[5] if le 206 name=elements[5] if len(elements) == 6 else '', 207 start=start, 207 start=start, 208 end=end, 208 end=end, 209 read=elements[1][0] == 209 read=elements[1][0] == 'r', 210 write=elements[1][1] = 210 write=elements[1][1] == 'w', 211 execute=elements[1][2] 211 execute=elements[1][2] == 'x', 212 private=elements[1][3] 212 private=elements[1][3] == 'p', 213 pgoff=int(elements[2], 213 pgoff=int(elements[2], 16), 214 major=major, 214 major=major, 215 minor=minor, 215 minor=minor, 216 inode=int(elements[4], 216 inode=int(elements[4], 16), 217 stats={}, 217 stats={}, 218 )) 218 )) 219 else: 219 else: 220 param = elements[0][:-1] 220 param = elements[0][:-1] 221 if param in stats: 221 if param in stats: 222 value = int(elements[1 222 value = int(elements[1]) 223 self.vmas[-1].stats[pa 223 self.vmas[-1].stats[param] = {'type': None, 'value': value} 224 224 225 def __iter__(self): 225 def __iter__(self): 226 yield from self.vmas 226 yield from self.vmas 227 227 228 228 229 def thp_parse(vma, kpageflags, ranges, indexes 229 def thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads): 230 # Given 4 same-sized arrays representing a 230 # Given 4 same-sized arrays representing a range within a page table backed 231 # by THPs (vfns: virtual frame numbers, pf 231 # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: 232 # True if page is anonymous, heads: True i 232 # True if page is anonymous, heads: True if page is head of a THP), return a 233 # dictionary of statistics describing the 233 # dictionary of statistics describing the mapped THPs. 234 stats = { 234 stats = { 235 'file': { 235 'file': { 236 'partial': 0, 236 'partial': 0, 237 'aligned': [0] * (PMD_ORDER + 1), 237 'aligned': [0] * (PMD_ORDER + 1), 238 'unaligned': [0] * (PMD_ORDER + 1) 238 'unaligned': [0] * (PMD_ORDER + 1), 239 }, 239 }, 240 'anon': { 240 'anon': { 241 'partial': 0, 241 'partial': 0, 242 'aligned': [0] * (PMD_ORDER + 1), 242 'aligned': [0] * (PMD_ORDER + 1), 243 'unaligned': [0] * (PMD_ORDER + 1) 243 'unaligned': [0] * (PMD_ORDER + 1), 244 }, 244 }, 245 } 245 } 246 246 247 for rindex, rpfn in zip(ranges[0], ranges[ 247 for rindex, rpfn in zip(ranges[0], ranges[2]): 248 index_next = int(rindex[0]) 248 index_next = int(rindex[0]) 249 index_end = int(rindex[1]) + 1 249 index_end = int(rindex[1]) + 1 250 pfn_end = int(rpfn[1]) + 1 250 pfn_end = int(rpfn[1]) + 1 251 251 252 folios = indexes[index_next:index_end] 252 folios = indexes[index_next:index_end][heads[index_next:index_end]] 253 253 254 # Account pages for any partially mapp 254 # Account pages for any partially mapped THP at the front. In that case, 255 # the first page of the range is a tai 255 # the first page of the range is a tail. 256 nr = (int(folios[0]) if len(folios) el 256 nr = (int(folios[0]) if len(folios) else index_end) - index_next 257 stats['anon' if anons[index_next] else 257 stats['anon' if anons[index_next] else 'file']['partial'] += nr 258 258 259 # Account pages for any partially mapp 259 # Account pages for any partially mapped THP at the back. In that case, 260 # the next page after the range is a t 260 # the next page after the range is a tail. 261 if len(folios): 261 if len(folios): 262 flags = int(kpageflags.get(pfn_end 262 flags = int(kpageflags.get(pfn_end)[0]) 263 if flags & KPF_COMPOUND_TAIL: 263 if flags & KPF_COMPOUND_TAIL: 264 nr = index_end - int(folios[-1 264 nr = index_end - int(folios[-1]) 265 folios = folios[:-1] 265 folios = folios[:-1] 266 index_end -= nr 266 index_end -= nr 267 stats['anon' if anons[index_en 267 stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr 268 268 269 # Account fully mapped THPs in the mid 269 # Account fully mapped THPs in the middle of the range. 270 if len(folios): 270 if len(folios): 271 folio_nrs = np.append(np.diff(foli 271 folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1])) 272 folio_orders = np.log2(folio_nrs). 272 folio_orders = np.log2(folio_nrs).astype(np.uint64) 273 for index, order in zip(folios, fo 273 for index, order in zip(folios, folio_orders): 274 index = int(index) 274 index = int(index) 275 order = int(order) 275 order = int(order) 276 nr = 1 << order 276 nr = 1 << order 277 vfn = int(vfns[index]) 277 vfn = int(vfns[index]) 278 align = 'aligned' if align_for 278 align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned' 279 anon = 'anon' if anons[index] 279 anon = 'anon' if anons[index] else 'file' 280 stats[anon][align][order] += n 280 stats[anon][align][order] += nr 281 281 282 # Account PMD-mapped THPs spearately, so f 282 # Account PMD-mapped THPs spearately, so filter out of the stats. There is a 283 # race between acquiring the smaps stats a 283 # race between acquiring the smaps stats and reading pagemap, where memory 284 # could be deallocated. So clamp to zero i 284 # could be deallocated. So clamp to zero incase it would have gone negative. 285 anon_pmd_mapped = vma.stats['AnonHugePages 285 anon_pmd_mapped = vma.stats['AnonHugePages']['value'] 286 file_pmd_mapped = vma.stats['ShmemPmdMappe 286 file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ 287 vma.stats['FilePmdMapped 287 vma.stats['FilePmdMapped']['value'] 288 stats['anon']['aligned'][PMD_ORDER] = max( 288 stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped)) 289 stats['file']['aligned'][PMD_ORDER] = max( 289 stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped)) 290 290 291 rstats = { 291 rstats = { 292 f"anon-thp-pmd-aligned-{odkb(PMD_ORDER 292 f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, 293 f"file-thp-pmd-aligned-{odkb(PMD_ORDER 293 f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped}, 294 } 294 } 295 295 296 def flatten_sub(type, subtype, stats): 296 def flatten_sub(type, subtype, stats): 297 param = f"{type}-thp-pte-{subtype}-{{} 297 param = f"{type}-thp-pte-{subtype}-{{}}kB" 298 for od, nr in enumerate(stats[2:], 2): 298 for od, nr in enumerate(stats[2:], 2): 299 rstats[param.format(odkb(od))] = { 299 rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)} 300 300 301 def flatten_type(type, stats): 301 def flatten_type(type, stats): 302 flatten_sub(type, 'aligned', stats['al 302 flatten_sub(type, 'aligned', stats['aligned']) 303 flatten_sub(type, 'unaligned', stats[' 303 flatten_sub(type, 'unaligned', stats['unaligned']) 304 rstats[f"{type}-thp-pte-partial"] = {' 304 rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])} 305 305 306 flatten_type('anon', stats['anon']) 306 flatten_type('anon', stats['anon']) 307 flatten_type('file', stats['file']) 307 flatten_type('file', stats['file']) 308 308 309 return rstats 309 return rstats 310 310 311 311 312 def cont_parse(vma, order, ranges, anons, head 312 def cont_parse(vma, order, ranges, anons, heads): 313 # Given 4 same-sized arrays representing a 313 # Given 4 same-sized arrays representing a range within a page table backed 314 # by THPs (vfns: virtual frame numbers, pf 314 # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons: 315 # True if page is anonymous, heads: True i 315 # True if page is anonymous, heads: True if page is head of a THP), return a 316 # dictionary of statistics describing the 316 # dictionary of statistics describing the contiguous blocks. 317 nr_cont = 1 << order 317 nr_cont = 1 << order 318 nr_anon = 0 318 nr_anon = 0 319 nr_file = 0 319 nr_file = 0 320 320 321 for rindex, rvfn, rpfn in zip(*ranges): 321 for rindex, rvfn, rpfn in zip(*ranges): 322 index_next = int(rindex[0]) 322 index_next = int(rindex[0]) 323 index_end = int(rindex[1]) + 1 323 index_end = int(rindex[1]) + 1 324 vfn_start = int(rvfn[0]) 324 vfn_start = int(rvfn[0]) 325 pfn_start = int(rpfn[0]) 325 pfn_start = int(rpfn[0]) 326 326 327 if align_offset(pfn_start, nr_cont) != 327 if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont): 328 continue 328 continue 329 329 330 off = align_forward(vfn_start, nr_cont 330 off = align_forward(vfn_start, nr_cont) - vfn_start 331 index_next += off 331 index_next += off 332 332 333 while index_next + nr_cont <= index_en 333 while index_next + nr_cont <= index_end: 334 folio_boundary = heads[index_next+ 334 folio_boundary = heads[index_next+1:index_next+nr_cont].any() 335 if not folio_boundary: 335 if not folio_boundary: 336 if anons[index_next]: 336 if anons[index_next]: 337 nr_anon += nr_cont 337 nr_anon += nr_cont 338 else: 338 else: 339 nr_file += nr_cont 339 nr_file += nr_cont 340 index_next += nr_cont 340 index_next += nr_cont 341 341 342 # Account blocks that are PMD-mapped spear 342 # Account blocks that are PMD-mapped spearately, so filter out of the stats. 343 # There is a race between acquiring the sm 343 # There is a race between acquiring the smaps stats and reading pagemap, 344 # where memory could be deallocated. So cl 344 # where memory could be deallocated. So clamp to zero incase it would have 345 # gone negative. 345 # gone negative. 346 anon_pmd_mapped = vma.stats['AnonHugePages 346 anon_pmd_mapped = vma.stats['AnonHugePages']['value'] 347 file_pmd_mapped = vma.stats['ShmemPmdMappe 347 file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \ 348 vma.stats['FilePmdMapped'] 348 vma.stats['FilePmdMapped']['value'] 349 nr_anon = max(0, nr_anon - kbnr(anon_pmd_m 349 nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped)) 350 nr_file = max(0, nr_file - kbnr(file_pmd_m 350 nr_file = max(0, nr_file - kbnr(file_pmd_mapped)) 351 351 352 rstats = { 352 rstats = { 353 f"anon-cont-pmd-aligned-{nrkb(nr_cont) 353 f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped}, 354 f"file-cont-pmd-aligned-{nrkb(nr_cont) 354 f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped}, 355 } 355 } 356 356 357 rstats[f"anon-cont-pte-aligned-{nrkb(nr_co 357 rstats[f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'anon', 'value': nrkb(nr_anon)} 358 rstats[f"file-cont-pte-aligned-{nrkb(nr_co 358 rstats[f"file-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'file', 'value': nrkb(nr_file)} 359 359 360 return rstats 360 return rstats 361 361 362 362 363 def vma_print(vma, pid): 363 def vma_print(vma, pid): 364 # Prints a VMA instance in a format simila 364 # Prints a VMA instance in a format similar to smaps. The main difference is 365 # that the pid is included as the first va 365 # that the pid is included as the first value. 366 print("{:010d}: {:016x}-{:016x} {}{}{}{} { 366 print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}" 367 .format( 367 .format( 368 pid, vma.start, vma.end, 368 pid, vma.start, vma.end, 369 'r' if vma.read else '-', 'w' if v 369 'r' if vma.read else '-', 'w' if vma.write else '-', 370 'x' if vma.execute else '-', 'p' i 370 'x' if vma.execute else '-', 'p' if vma.private else 's', 371 vma.pgoff, vma.major, vma.minor, v 371 vma.pgoff, vma.major, vma.minor, vma.inode, vma.name 372 )) 372 )) 373 373 374 374 375 def stats_print(stats, tot_anon, tot_file, inc 375 def stats_print(stats, tot_anon, tot_file, inc_empty): 376 # Print a statistics dictionary. 376 # Print a statistics dictionary. 377 label_field = 32 377 label_field = 32 378 for label, stat in stats.items(): 378 for label, stat in stats.items(): 379 type = stat['type'] 379 type = stat['type'] 380 value = stat['value'] 380 value = stat['value'] 381 if value or inc_empty: 381 if value or inc_empty: 382 pad = max(0, label_field - len(lab 382 pad = max(0, label_field - len(label) - 1) 383 if type == 'anon' and tot_anon > 0 383 if type == 'anon' and tot_anon > 0: 384 percent = f' ({value / tot_ano 384 percent = f' ({value / tot_anon:3.0%})' 385 elif type == 'file' and tot_file > 385 elif type == 'file' and tot_file > 0: 386 percent = f' ({value / tot_fil 386 percent = f' ({value / tot_file:3.0%})' 387 else: 387 else: 388 percent = '' 388 percent = '' 389 print(f"{label}:{' ' * pad}{value: 389 print(f"{label}:{' ' * pad}{value:8} kB{percent}") 390 390 391 391 392 def vma_parse(vma, pagemap, kpageflags, contor 392 def vma_parse(vma, pagemap, kpageflags, contorders): 393 # Generate thp and cont statistics for a s 393 # Generate thp and cont statistics for a single VMA. 394 start = vma.start >> PAGE_SHIFT 394 start = vma.start >> PAGE_SHIFT 395 end = vma.end >> PAGE_SHIFT 395 end = vma.end >> PAGE_SHIFT 396 396 397 pmes = pagemap.get(start, end - start) 397 pmes = pagemap.get(start, end - start) 398 present = pmes & PM_PAGE_PRESENT != 0 398 present = pmes & PM_PAGE_PRESENT != 0 399 pfns = pmes & PM_PFN_MASK 399 pfns = pmes & PM_PFN_MASK 400 pfns = pfns[present] 400 pfns = pfns[present] 401 vfns = np.arange(start, end, dtype=np.uint 401 vfns = np.arange(start, end, dtype=np.uint64) 402 vfns = vfns[present] 402 vfns = vfns[present] 403 403 404 pfn_vec = cont_ranges_all([pfns], [pfns])[ 404 pfn_vec = cont_ranges_all([pfns], [pfns])[0] 405 flags = kpageflags.getv(pfn_vec) 405 flags = kpageflags.getv(pfn_vec) 406 anons = flags & KPF_ANON != 0 406 anons = flags & KPF_ANON != 0 407 heads = flags & KPF_COMPOUND_HEAD != 0 407 heads = flags & KPF_COMPOUND_HEAD != 0 408 thps = flags & KPF_THP != 0 408 thps = flags & KPF_THP != 0 409 409 410 vfns = vfns[thps] 410 vfns = vfns[thps] 411 pfns = pfns[thps] 411 pfns = pfns[thps] 412 anons = anons[thps] 412 anons = anons[thps] 413 heads = heads[thps] 413 heads = heads[thps] 414 414 415 indexes = np.arange(len(vfns), dtype=np.ui 415 indexes = np.arange(len(vfns), dtype=np.uint64) 416 ranges = cont_ranges_all([vfns, pfns], [in 416 ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns]) 417 417 418 thpstats = thp_parse(vma, kpageflags, rang 418 thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads) 419 contstats = [cont_parse(vma, order, ranges 419 contstats = [cont_parse(vma, order, ranges, anons, heads) for order in contorders] 420 420 421 tot_anon = vma.stats['Anonymous']['value'] 421 tot_anon = vma.stats['Anonymous']['value'] 422 tot_file = vma.stats['Rss']['value'] - tot 422 tot_file = vma.stats['Rss']['value'] - tot_anon 423 423 424 return { 424 return { 425 **thpstats, 425 **thpstats, 426 **{k: v for s in contstats for k, v in 426 **{k: v for s in contstats for k, v in s.items()} 427 }, tot_anon, tot_file 427 }, tot_anon, tot_file 428 428 429 429 430 def do_main(args): 430 def do_main(args): 431 pids = set() 431 pids = set() 432 rollup = {} 432 rollup = {} 433 rollup_anon = 0 433 rollup_anon = 0 434 rollup_file = 0 434 rollup_file = 0 435 435 436 if args.cgroup: 436 if args.cgroup: 437 strict = False 437 strict = False 438 for walk_info in os.walk(args.cgroup): 438 for walk_info in os.walk(args.cgroup): 439 cgroup = walk_info[0] 439 cgroup = walk_info[0] 440 with open(f'{cgroup}/cgroup.procs' 440 with open(f'{cgroup}/cgroup.procs') as pidfile: 441 for line in pidfile.readlines( 441 for line in pidfile.readlines(): 442 pids.add(int(line.strip()) 442 pids.add(int(line.strip())) 443 elif args.pid: 443 elif args.pid: 444 strict = True 444 strict = True 445 pids = pids.union(args.pid) 445 pids = pids.union(args.pid) 446 else: 446 else: 447 strict = False 447 strict = False 448 for pid in os.listdir('/proc'): 448 for pid in os.listdir('/proc'): 449 if pid.isdigit(): 449 if pid.isdigit(): 450 pids.add(int(pid)) 450 pids.add(int(pid)) 451 451 452 if not args.rollup: 452 if not args.rollup: 453 print(" PID START 453 print(" PID START END PROT OFFSET DEV INODE OBJECT") 454 454 455 for pid in pids: 455 for pid in pids: 456 try: 456 try: 457 with PageMap(pid) as pagemap: 457 with PageMap(pid) as pagemap: 458 with KPageFlags() as kpageflag 458 with KPageFlags() as kpageflags: 459 for vma in VMAList(pid, vm 459 for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats): 460 if (vma.read or vma.wr 460 if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0: 461 stats, vma_anon, v 461 stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont) 462 else: 462 else: 463 stats = {} 463 stats = {} 464 vma_anon = 0 464 vma_anon = 0 465 vma_file = 0 465 vma_file = 0 466 if args.inc_smaps: 466 if args.inc_smaps: 467 stats = {**vma.sta 467 stats = {**vma.stats, **stats} 468 if args.rollup: 468 if args.rollup: 469 for k, v in stats. 469 for k, v in stats.items(): 470 if k in rollup 470 if k in rollup: 471 assert(rol 471 assert(rollup[k]['type'] == v['type']) 472 rollup[k][ 472 rollup[k]['value'] += v['value'] 473 else: 473 else: 474 rollup[k] 474 rollup[k] = v 475 rollup_anon += vma 475 rollup_anon += vma_anon 476 rollup_file += vma 476 rollup_file += vma_file 477 else: 477 else: 478 vma_print(vma, pid 478 vma_print(vma, pid) 479 stats_print(stats, 479 stats_print(stats, vma_anon, vma_file, args.inc_empty) 480 except (FileNotFoundError, ProcessLook 480 except (FileNotFoundError, ProcessLookupError, FileIOException): 481 if strict: 481 if strict: 482 raise 482 raise 483 483 484 if args.rollup: 484 if args.rollup: 485 stats_print(rollup, rollup_anon, rollu 485 stats_print(rollup, rollup_anon, rollup_file, args.inc_empty) 486 486 487 487 488 def main(): 488 def main(): 489 docs_width = shutil.get_terminal_size().co 489 docs_width = shutil.get_terminal_size().columns 490 docs_width -= 2 490 docs_width -= 2 491 docs_width = min(80, docs_width) 491 docs_width = min(80, docs_width) 492 492 493 def format(string): 493 def format(string): 494 text = re.sub(r'\s+', ' ', string) 494 text = re.sub(r'\s+', ' ', string) 495 text = re.sub(r'\s*\\n\s*', '\n', text 495 text = re.sub(r'\s*\\n\s*', '\n', text) 496 paras = text.split('\n') 496 paras = text.split('\n') 497 paras = [textwrap.fill(p, width=docs_w 497 paras = [textwrap.fill(p, width=docs_width) for p in paras] 498 return '\n'.join(paras) 498 return '\n'.join(paras) 499 499 500 def formatter(prog): 500 def formatter(prog): 501 return argparse.RawDescriptionHelpForm 501 return argparse.RawDescriptionHelpFormatter(prog, width=docs_width) 502 502 503 def size2order(human): 503 def size2order(human): 504 units = { 504 units = { 505 "K": 2**10, "M": 2**20, "G": 2**30 505 "K": 2**10, "M": 2**20, "G": 2**30, 506 "k": 2**10, "m": 2**20, "g": 2**30 506 "k": 2**10, "m": 2**20, "g": 2**30, 507 } 507 } 508 unit = 1 508 unit = 1 509 if human[-1] in units: 509 if human[-1] in units: 510 unit = units[human[-1]] 510 unit = units[human[-1]] 511 human = human[:-1] 511 human = human[:-1] 512 try: 512 try: 513 size = int(human) 513 size = int(human) 514 except ValueError: 514 except ValueError: 515 raise ArgException('error: --cont 515 raise ArgException('error: --cont value must be integer size with optional KMG unit') 516 size *= unit 516 size *= unit 517 order = int(math.log2(size / PAGE_SIZE 517 order = int(math.log2(size / PAGE_SIZE)) 518 if order < 1: 518 if order < 1: 519 raise ArgException('error: --cont 519 raise ArgException('error: --cont value must be size of at least 2 pages') 520 if (1 << order) * PAGE_SIZE != size: 520 if (1 << order) * PAGE_SIZE != size: 521 raise ArgException('error: --cont 521 raise ArgException('error: --cont value must be size of power-of-2 pages') 522 if order > PMD_ORDER: 522 if order > PMD_ORDER: 523 raise ArgException('error: --cont 523 raise ArgException('error: --cont value must be less than or equal to PMD order') 524 return order 524 return order 525 525 526 parser = argparse.ArgumentParser(formatter 526 parser = argparse.ArgumentParser(formatter_class=formatter, 527 description=format("""Prints informati 527 description=format("""Prints information about how transparent huge 528 pages are mapped, either s 528 pages are mapped, either system-wide, or for a specified 529 process or cgroup.\\n 529 process or cgroup.\\n 530 \\n 530 \\n 531 When run with --pid, the u 531 When run with --pid, the user explicitly specifies the set 532 of pids to scan. e.g. "--p 532 of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run 533 with --cgroup, the user pa 533 with --cgroup, the user passes either a v1 or v2 cgroup and 534 all pids that belong to th 534 all pids that belong to the cgroup subtree are scanned. When 535 run with neither --pid nor 535 run with neither --pid nor --cgroup, the full set of pids on 536 the system is gathered fro 536 the system is gathered from /proc and scanned as if the user 537 had provided "--pid 1 --pi 537 had provided "--pid 1 --pid 2 ...".\\n 538 \\n 538 \\n 539 A default set of statistic 539 A default set of statistics is always generated for THP 540 mappings. However, it is a 540 mappings. However, it is also possible to generate 541 additional statistics for 541 additional statistics for "contiguous block mappings" where 542 the block size is user-def 542 the block size is user-defined.\\n 543 \\n 543 \\n 544 Statistics are maintained 544 Statistics are maintained independently for anonymous and 545 file-backed (pagecache) me 545 file-backed (pagecache) memory and are shown both in kB and 546 as a percentage of either 546 as a percentage of either total anonymous or total 547 file-backed memory as appr 547 file-backed memory as appropriate.\\n 548 \\n 548 \\n 549 THP Statistics\\n 549 THP Statistics\\n 550 --------------\\n 550 --------------\\n 551 \\n 551 \\n 552 Statistics are always gene 552 Statistics are always generated for fully- and 553 contiguously-mapped THPs w 553 contiguously-mapped THPs whose mapping address is aligned to 554 their size, for each <size 554 their size, for each <size> supported by the system. 555 Separate counters describe 555 Separate counters describe THPs mapped by PTE vs those 556 mapped by PMD. (Although n 556 mapped by PMD. (Although note a THP can only be mapped by 557 PMD if it is PMD-sized):\\ 557 PMD if it is PMD-sized):\\n 558 \\n 558 \\n 559 - anon-thp-pte-aligned-<si 559 - anon-thp-pte-aligned-<size>kB\\n 560 - file-thp-pte-aligned-<si 560 - file-thp-pte-aligned-<size>kB\\n 561 - anon-thp-pmd-aligned-<si 561 - anon-thp-pmd-aligned-<size>kB\\n 562 - file-thp-pmd-aligned-<si 562 - file-thp-pmd-aligned-<size>kB\\n 563 \\n 563 \\n 564 Similarly, statistics are 564 Similarly, statistics are always generated for fully- and 565 contiguously-mapped THPs w 565 contiguously-mapped THPs whose mapping address is *not* 566 aligned to their size, for 566 aligned to their size, for each <size> supported by the 567 system. Due to the unalign 567 system. Due to the unaligned mapping, it is impossible to 568 map by PMD, so there are o 568 map by PMD, so there are only PTE counters for this case:\\n 569 \\n 569 \\n 570 - anon-thp-pte-unaligned-< 570 - anon-thp-pte-unaligned-<size>kB\\n 571 - file-thp-pte-unaligned-< 571 - file-thp-pte-unaligned-<size>kB\\n 572 \\n 572 \\n 573 Statistics are also always 573 Statistics are also always generated for mapped pages that 574 belong to a THP but where 574 belong to a THP but where the is THP is *not* fully- and 575 contiguously- mapped. Thes 575 contiguously- mapped. These "partial" mappings are all 576 counted in the same counte 576 counted in the same counter regardless of the size of the 577 THP that is partially mapp 577 THP that is partially mapped:\\n 578 \\n 578 \\n 579 - anon-thp-pte-partial\\n 579 - anon-thp-pte-partial\\n 580 - file-thp-pte-partial\\n 580 - file-thp-pte-partial\\n 581 \\n 581 \\n 582 Contiguous Block Statistic 582 Contiguous Block Statistics\\n 583 -------------------------- 583 ---------------------------\\n 584 \\n 584 \\n 585 An optional, additional se 585 An optional, additional set of statistics is generated for 586 every contiguous block siz 586 every contiguous block size specified with `--cont <size>`. 587 These statistics show how 587 These statistics show how much memory is mapped in 588 contiguous blocks of <size 588 contiguous blocks of <size> and also aligned to <size>. A 589 given contiguous block mus 589 given contiguous block must all belong to the same THP, but 590 there is no requirement fo 590 there is no requirement for it to be the *whole* THP. 591 Separate counters describe 591 Separate counters describe contiguous blocks mapped by PTE 592 vs those mapped by PMD:\\n 592 vs those mapped by PMD:\\n 593 \\n 593 \\n 594 - anon-cont-pte-aligned-<s 594 - anon-cont-pte-aligned-<size>kB\\n 595 - file-cont-pte-aligned-<s 595 - file-cont-pte-aligned-<size>kB\\n 596 - anon-cont-pmd-aligned-<s 596 - anon-cont-pmd-aligned-<size>kB\\n 597 - file-cont-pmd-aligned-<s 597 - file-cont-pmd-aligned-<size>kB\\n 598 \\n 598 \\n 599 As an example, if monitori 599 As an example, if monitoring 64K contiguous blocks (--cont 600 64K), there are a number o 600 64K), there are a number of sources that could provide such 601 blocks: a fully- and conti 601 blocks: a fully- and contiguously-mapped 64K THP that is 602 aligned to a 64K boundary 602 aligned to a 64K boundary would provide 1 block. A fully- 603 and contiguously-mapped 12 603 and contiguously-mapped 128K THP that is aligned to at least 604 a 64K boundary would provi 604 a 64K boundary would provide 2 blocks. Or a 128K THP that 605 maps its first 100K, but c 605 maps its first 100K, but contiguously and starting at a 64K 606 boundary would provide 1 b 606 boundary would provide 1 block. A fully- and 607 contiguously-mapped 2M THP 607 contiguously-mapped 2M THP would provide 32 blocks. There 608 are many other possible pe 608 are many other possible permutations.\\n"""), 609 epilog=format("""Requires root privile 609 epilog=format("""Requires root privilege to access pagemap and 610 kpageflags.""")) 610 kpageflags.""")) 611 611 612 group = parser.add_mutually_exclusive_grou 612 group = parser.add_mutually_exclusive_group(required=False) 613 group.add_argument('--pid', 613 group.add_argument('--pid', 614 metavar='pid', required=False, type=in 614 metavar='pid', required=False, type=int, default=[], action='append', 615 help="""Process id of the target proce 615 help="""Process id of the target process. Maybe issued multiple times to 616 scan multiple processes. --pid and 616 scan multiple processes. --pid and --cgroup are mutually exclusive. 617 If neither are provided, all proce 617 If neither are provided, all processes are scanned to provide 618 system-wide information.""") 618 system-wide information.""") 619 619 620 group.add_argument('--cgroup', 620 group.add_argument('--cgroup', 621 metavar='path', required=False, 621 metavar='path', required=False, 622 help="""Path to the target cgroup in s 622 help="""Path to the target cgroup in sysfs. Iterates over every pid in 623 the cgroup and its children. --pid 623 the cgroup and its children. --pid and --cgroup are mutually 624 exclusive. If neither are provided 624 exclusive. If neither are provided, all processes are scanned to 625 provide system-wide information."" 625 provide system-wide information.""") 626 626 627 parser.add_argument('--rollup', 627 parser.add_argument('--rollup', 628 required=False, default=False, action= 628 required=False, default=False, action='store_true', 629 help="""Sum the per-vma statistics to 629 help="""Sum the per-vma statistics to provide a summary over the whole 630 system, process or cgroup.""") 630 system, process or cgroup.""") 631 631 632 parser.add_argument('--cont', 632 parser.add_argument('--cont', 633 metavar='size[KMG]', required=False, d 633 metavar='size[KMG]', required=False, default=[], action='append', 634 help="""Adds stats for memory that is 634 help="""Adds stats for memory that is mapped in contiguous blocks of 635 <size> and also aligned to <size>. 635 <size> and also aligned to <size>. May be issued multiple times to 636 track multiple sized blocks. Usefu 636 track multiple sized blocks. Useful to infer e.g. arm64 contpte and 637 hpa mappings. Size must be a power 637 hpa mappings. Size must be a power-of-2 number of pages.""") 638 638 639 parser.add_argument('--inc-smaps', 639 parser.add_argument('--inc-smaps', 640 required=False, default=False, action= 640 required=False, default=False, action='store_true', 641 help="""Include all numerical, additiv 641 help="""Include all numerical, additive /proc/<pid>/smaps stats in the 642 output.""") 642 output.""") 643 643 644 parser.add_argument('--inc-empty', 644 parser.add_argument('--inc-empty', 645 required=False, default=False, action= 645 required=False, default=False, action='store_true', 646 help="""Show all statistics including 646 help="""Show all statistics including those whose value is 0.""") 647 647 648 parser.add_argument('--periodic', 648 parser.add_argument('--periodic', 649 metavar='sleep_ms', required=False, ty 649 metavar='sleep_ms', required=False, type=int, 650 help="""Run in a loop, polling every s 650 help="""Run in a loop, polling every sleep_ms milliseconds.""") 651 651 652 args = parser.parse_args() 652 args = parser.parse_args() 653 653 654 try: 654 try: 655 args.cont = [size2order(cont) for cont 655 args.cont = [size2order(cont) for cont in args.cont] 656 except ArgException as e: 656 except ArgException as e: 657 parser.print_usage() 657 parser.print_usage() 658 raise 658 raise 659 659 660 if args.periodic: 660 if args.periodic: 661 while True: 661 while True: 662 do_main(args) 662 do_main(args) 663 print() 663 print() 664 time.sleep(args.periodic / 1000) 664 time.sleep(args.periodic / 1000) 665 else: 665 else: 666 do_main(args) 666 do_main(args) 667 667 668 668 669 if __name__ == "__main__": 669 if __name__ == "__main__": 670 try: 670 try: 671 main() 671 main() 672 except Exception as e: 672 except Exception as e: 673 prog = os.path.basename(sys.argv[0]) 673 prog = os.path.basename(sys.argv[0]) 674 print(f'{prog}: {e}') 674 print(f'{prog}: {e}') 675 exit(1) 675 exit(1)
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.