1 #!/usr/bin/env python3 1 #!/usr/bin/env python3 2 # SPDX-License-Identifier: GPL-2.0 2 # SPDX-License-Identifier: GPL-2.0 3 # 3 # 4 # Copyright (C) Google LLC, 2018 4 # Copyright (C) Google LLC, 2018 5 # 5 # 6 # Author: Tom Roeder <tmroeder@google.com> 6 # Author: Tom Roeder <tmroeder@google.com> 7 # 7 # 8 """A tool for generating compile_commands.json 8 """A tool for generating compile_commands.json in the Linux kernel.""" 9 9 10 import argparse 10 import argparse 11 import json 11 import json 12 import logging 12 import logging 13 import os 13 import os 14 import re 14 import re 15 import subprocess 15 import subprocess 16 import sys 16 import sys 17 17 18 _DEFAULT_OUTPUT = 'compile_commands.json' 18 _DEFAULT_OUTPUT = 'compile_commands.json' 19 _DEFAULT_LOG_LEVEL = 'WARNING' 19 _DEFAULT_LOG_LEVEL = 'WARNING' 20 20 21 _FILENAME_PATTERN = r'^\..*\.cmd$' 21 _FILENAME_PATTERN = r'^\..*\.cmd$' 22 _LINE_PATTERN = r'^(saved)?cmd_[^ ]*\.o := (?P 22 _LINE_PATTERN = r'^(saved)?cmd_[^ ]*\.o := (?P<command_prefix>.* )(?P<file_path>[^ ]*\.[cS]) *(;|$)' 23 _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING 23 _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] 24 # The tools/ directory adopts a different buil 24 # The tools/ directory adopts a different build system, and produces .cmd 25 # files in a different format. Do not support 25 # files in a different format. Do not support it. 26 _EXCLUDE_DIRS = ['.git', 'Documentation', 'inc 26 _EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools'] 27 27 28 def parse_arguments(): 28 def parse_arguments(): 29 """Sets up and parses command-line argumen 29 """Sets up and parses command-line arguments. 30 30 31 Returns: 31 Returns: 32 log_level: A logging level to filter l 32 log_level: A logging level to filter log output. 33 directory: The work directory where th 33 directory: The work directory where the objects were built. 34 ar: Command used for parsing .a archiv 34 ar: Command used for parsing .a archives. 35 output: Where to write the compile-com 35 output: Where to write the compile-commands JSON file. 36 paths: The list of files/directories t 36 paths: The list of files/directories to handle to find .cmd files. 37 """ 37 """ 38 usage = 'Creates a compile_commands.json d 38 usage = 'Creates a compile_commands.json database from kernel .cmd files' 39 parser = argparse.ArgumentParser(descripti 39 parser = argparse.ArgumentParser(description=usage) 40 40 41 directory_help = ('specify the output dire 41 directory_help = ('specify the output directory used for the kernel build ' 42 '(defaults to the workin 42 '(defaults to the working directory)') 43 parser.add_argument('-d', '--directory', t 43 parser.add_argument('-d', '--directory', type=str, default='.', 44 help=directory_help) 44 help=directory_help) 45 45 46 output_help = ('path to the output command 46 output_help = ('path to the output command database (defaults to ' + 47 _DEFAULT_OUTPUT + ')') 47 _DEFAULT_OUTPUT + ')') 48 parser.add_argument('-o', '--output', type 48 parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT, 49 help=output_help) 49 help=output_help) 50 50 51 log_level_help = ('the level of log messag 51 log_level_help = ('the level of log messages to produce (defaults to ' + 52 _DEFAULT_LOG_LEVEL + ')' 52 _DEFAULT_LOG_LEVEL + ')') 53 parser.add_argument('--log_level', choices 53 parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS, 54 default=_DEFAULT_LOG_L 54 default=_DEFAULT_LOG_LEVEL, help=log_level_help) 55 55 56 ar_help = 'command used for parsing .a arc 56 ar_help = 'command used for parsing .a archives' 57 parser.add_argument('-a', '--ar', type=str 57 parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help) 58 58 59 paths_help = ('directories to search or fi 59 paths_help = ('directories to search or files to parse ' 60 '(files should be *.o, *.a, 60 '(files should be *.o, *.a, or modules.order). ' 61 'If nothing is specified, th 61 'If nothing is specified, the current directory is searched') 62 parser.add_argument('paths', type=str, nar 62 parser.add_argument('paths', type=str, nargs='*', help=paths_help) 63 63 64 args = parser.parse_args() 64 args = parser.parse_args() 65 65 66 return (args.log_level, 66 return (args.log_level, 67 os.path.realpath(args.directory), 67 os.path.realpath(args.directory), 68 args.output, 68 args.output, 69 args.ar, 69 args.ar, 70 args.paths if len(args.paths) > 0 70 args.paths if len(args.paths) > 0 else [args.directory]) 71 71 72 72 73 def cmdfiles_in_dir(directory): 73 def cmdfiles_in_dir(directory): 74 """Generate the iterator of .cmd files fou 74 """Generate the iterator of .cmd files found under the directory. 75 75 76 Walk under the given directory, and yield 76 Walk under the given directory, and yield every .cmd file found. 77 77 78 Args: 78 Args: 79 directory: The directory to search for 79 directory: The directory to search for .cmd files. 80 80 81 Yields: 81 Yields: 82 The path to a .cmd file. 82 The path to a .cmd file. 83 """ 83 """ 84 84 85 filename_matcher = re.compile(_FILENAME_PA 85 filename_matcher = re.compile(_FILENAME_PATTERN) 86 exclude_dirs = [ os.path.join(directory, d 86 exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ] 87 87 88 for dirpath, dirnames, filenames in os.wal 88 for dirpath, dirnames, filenames in os.walk(directory, topdown=True): 89 # Prune unwanted directories. 89 # Prune unwanted directories. 90 if dirpath in exclude_dirs: 90 if dirpath in exclude_dirs: 91 dirnames[:] = [] 91 dirnames[:] = [] 92 continue 92 continue 93 93 94 for filename in filenames: 94 for filename in filenames: 95 if filename_matcher.match(filename 95 if filename_matcher.match(filename): 96 yield os.path.join(dirpath, fi 96 yield os.path.join(dirpath, filename) 97 97 98 98 99 def to_cmdfile(path): 99 def to_cmdfile(path): 100 """Return the path of .cmd file used for t 100 """Return the path of .cmd file used for the given build artifact 101 101 102 Args: 102 Args: 103 Path: file path 103 Path: file path 104 104 105 Returns: 105 Returns: 106 The path to .cmd file 106 The path to .cmd file 107 """ 107 """ 108 dir, base = os.path.split(path) 108 dir, base = os.path.split(path) 109 return os.path.join(dir, '.' + base + '.cm 109 return os.path.join(dir, '.' + base + '.cmd') 110 110 111 111 112 def cmdfiles_for_a(archive, ar): 112 def cmdfiles_for_a(archive, ar): 113 """Generate the iterator of .cmd files ass 113 """Generate the iterator of .cmd files associated with the archive. 114 114 115 Parse the given archive, and yield every . 115 Parse the given archive, and yield every .cmd file used to build it. 116 116 117 Args: 117 Args: 118 archive: The archive to parse 118 archive: The archive to parse 119 119 120 Yields: 120 Yields: 121 The path to every .cmd file found 121 The path to every .cmd file found 122 """ 122 """ 123 for obj in subprocess.check_output([ar, '- 123 for obj in subprocess.check_output([ar, '-t', archive]).decode().split(): 124 yield to_cmdfile(obj) 124 yield to_cmdfile(obj) 125 125 126 126 127 def cmdfiles_for_modorder(modorder): 127 def cmdfiles_for_modorder(modorder): 128 """Generate the iterator of .cmd files ass 128 """Generate the iterator of .cmd files associated with the modules.order. 129 129 130 Parse the given modules.order, and yield e 130 Parse the given modules.order, and yield every .cmd file used to build the 131 contained modules. 131 contained modules. 132 132 133 Args: 133 Args: 134 modorder: The modules.order file to pa 134 modorder: The modules.order file to parse 135 135 136 Yields: 136 Yields: 137 The path to every .cmd file found 137 The path to every .cmd file found 138 """ 138 """ 139 with open(modorder) as f: 139 with open(modorder) as f: 140 for line in f: 140 for line in f: 141 obj = line.rstrip() 141 obj = line.rstrip() 142 base, ext = os.path.splitext(obj) 142 base, ext = os.path.splitext(obj) 143 if ext != '.o': 143 if ext != '.o': 144 sys.exit('{}: module path must 144 sys.exit('{}: module path must end with .o'.format(obj)) 145 mod = base + '.mod' 145 mod = base + '.mod' 146 # Read from *.mod, to get a list o 146 # Read from *.mod, to get a list of objects that compose the module. 147 with open(mod) as m: 147 with open(mod) as m: 148 for mod_line in m: 148 for mod_line in m: 149 yield to_cmdfile(mod_line. 149 yield to_cmdfile(mod_line.rstrip()) 150 150 151 151 152 def process_line(root_directory, command_prefi 152 def process_line(root_directory, command_prefix, file_path): 153 """Extracts information from a .cmd line a 153 """Extracts information from a .cmd line and creates an entry from it. 154 154 155 Args: 155 Args: 156 root_directory: The directory that was 156 root_directory: The directory that was searched for .cmd files. Usually 157 used directly in the "directory" e 157 used directly in the "directory" entry in compile_commands.json. 158 command_prefix: The extracted command 158 command_prefix: The extracted command line, up to the last element. 159 file_path: The .c file from the end of 159 file_path: The .c file from the end of the extracted command. 160 Usually relative to root_directory 160 Usually relative to root_directory, but sometimes absolute. 161 161 162 Returns: 162 Returns: 163 An entry to append to compile_commands 163 An entry to append to compile_commands. 164 164 165 Raises: 165 Raises: 166 ValueError: Could not find the extract 166 ValueError: Could not find the extracted file based on file_path and 167 root_directory or file_directory. 167 root_directory or file_directory. 168 """ 168 """ 169 # The .cmd files are intended to be includ 169 # The .cmd files are intended to be included directly by Make, so they 170 # escape the pound sign '#', either as '\# 170 # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the 171 # kernel version). The compile_commands.js 171 # kernel version). The compile_commands.json file is not interepreted 172 # by Make, so this code replaces the escap 172 # by Make, so this code replaces the escaped version with '#'. 173 prefix = command_prefix.replace(r'\#', '#' 173 prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') 174 174 175 # Return the canonical path, eliminating a 175 # Return the canonical path, eliminating any symbolic links encountered in the path. 176 abs_path = os.path.realpath(os.path.join(r 176 abs_path = os.path.realpath(os.path.join(root_directory, file_path)) 177 if not os.path.exists(abs_path): 177 if not os.path.exists(abs_path): 178 raise ValueError('File %s not found' % 178 raise ValueError('File %s not found' % abs_path) 179 return { 179 return { 180 'directory': root_directory, 180 'directory': root_directory, 181 'file': abs_path, 181 'file': abs_path, 182 'command': prefix + file_path, 182 'command': prefix + file_path, 183 } 183 } 184 184 185 185 186 def main(): 186 def main(): 187 """Walks through the directory and finds a 187 """Walks through the directory and finds and parses .cmd files.""" 188 log_level, directory, output, ar, paths = 188 log_level, directory, output, ar, paths = parse_arguments() 189 189 190 level = getattr(logging, log_level) 190 level = getattr(logging, log_level) 191 logging.basicConfig(format='%(levelname)s: 191 logging.basicConfig(format='%(levelname)s: %(message)s', level=level) 192 192 193 line_matcher = re.compile(_LINE_PATTERN) 193 line_matcher = re.compile(_LINE_PATTERN) 194 194 195 compile_commands = [] 195 compile_commands = [] 196 196 197 for path in paths: 197 for path in paths: 198 # If 'path' is a directory, handle all 198 # If 'path' is a directory, handle all .cmd files under it. 199 # Otherwise, handle .cmd files associa 199 # Otherwise, handle .cmd files associated with the file. 200 # built-in objects are linked via vmli 200 # built-in objects are linked via vmlinux.a 201 # Modules are listed in modules.order. 201 # Modules are listed in modules.order. 202 if os.path.isdir(path): 202 if os.path.isdir(path): 203 cmdfiles = cmdfiles_in_dir(path) 203 cmdfiles = cmdfiles_in_dir(path) 204 elif path.endswith('.a'): 204 elif path.endswith('.a'): 205 cmdfiles = cmdfiles_for_a(path, ar 205 cmdfiles = cmdfiles_for_a(path, ar) 206 elif path.endswith('modules.order'): 206 elif path.endswith('modules.order'): 207 cmdfiles = cmdfiles_for_modorder(p 207 cmdfiles = cmdfiles_for_modorder(path) 208 else: 208 else: 209 sys.exit('{}: unknown file type'.f 209 sys.exit('{}: unknown file type'.format(path)) 210 210 211 for cmdfile in cmdfiles: 211 for cmdfile in cmdfiles: 212 with open(cmdfile, 'rt') as f: 212 with open(cmdfile, 'rt') as f: 213 result = line_matcher.match(f. 213 result = line_matcher.match(f.readline()) 214 if result: 214 if result: 215 try: 215 try: 216 entry = process_line(d 216 entry = process_line(directory, result.group('command_prefix'), 217 r 217 result.group('file_path')) 218 compile_commands.appen 218 compile_commands.append(entry) 219 except ValueError as err: 219 except ValueError as err: 220 logging.info('Could no 220 logging.info('Could not add line from %s: %s', 221 cmdfile, 221 cmdfile, err) 222 222 223 with open(output, 'wt') as f: 223 with open(output, 'wt') as f: 224 json.dump(sorted(compile_commands, key 224 json.dump(sorted(compile_commands, key=lambda x: x["file"]), f, indent=2, sort_keys=True) 225 225 226 226 227 if __name__ == '__main__': 227 if __name__ == '__main__': 228 main() 228 main()
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.