1 /* SPDX-License-Identifier: GPL-2.0-or-later * 1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 2 /* 3 * Lexical analysis for genksyms. 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 4 * Copyright 1996, 1997 Linux International. 5 * 5 * 6 * New implementation contributed by Richard H< 6 * New implementation contributed by Richard Henderson <rth@tamu.edu> 7 * Based on original work by Bjorn Ekwall <bj0r 7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se> 8 * 8 * 9 * Taken from Linux modutils 2.4.22. 9 * Taken from Linux modutils 2.4.22. 10 */ 10 */ 11 11 12 %{ 12 %{ 13 13 14 #include <limits.h> 14 #include <limits.h> 15 #include <stdlib.h> 15 #include <stdlib.h> 16 #include <string.h> 16 #include <string.h> 17 #include <ctype.h> 17 #include <ctype.h> 18 18 19 #include "genksyms.h" 19 #include "genksyms.h" 20 #include "parse.tab.h" 20 #include "parse.tab.h" 21 21 22 /* We've got a two-level lexer here. We let f 22 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 23 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 24 #define YY_DECL static int yylex1(void) 25 25 26 %} 26 %} 27 27 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 28 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 29 30 O_INT 0[0-7]* 30 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 31 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 32 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 34 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 35 36 FRAC ([0-9]*\.[0-9]+)|([0-9 36 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 37 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 38 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 39 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 40 41 STRING L?\"([^\\\"]*\\.)*[^\\ 41 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 42 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 43 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 45 46 /* We don't do multiple input files. */ 46 /* We don't do multiple input files. */ 47 %option noyywrap 47 %option noyywrap 48 48 49 %option noinput 49 %option noinput 50 50 51 %% 51 %% 52 52 53 53 54 /* Keep track of our location in the original 54 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 56 ^#.*\n cur_line++; 57 \n cur_li 57 \n cur_line++; 58 58 59 /* Ignore all other whitespace. */ 59 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 60 [ \t\f\v\r]+ ; 61 61 62 62 63 {STRING} return 63 {STRING} return STRING; 64 {CHAR} return 64 {CHAR} return CHAR; 65 {IDENT} return 65 {IDENT} return IDENT; 66 66 67 /* The Pedant requires that the other C multi 67 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 68 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 69 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 70 around them properly. */ 71 {MC_TOKEN} return 71 {MC_TOKEN} return OTHER; 72 {INT} return 72 {INT} return INT; 73 {REAL} return 73 {REAL} return REAL; 74 74 75 "..." return 75 "..." return DOTS; 76 76 77 /* All other tokens are single characters. * 77 /* All other tokens are single characters. */ 78 . return 78 . return yytext[0]; 79 79 80 80 81 %% 81 %% 82 82 83 /* Bring in the keyword recognizer. */ 83 /* Bring in the keyword recognizer. */ 84 84 85 #include "keywords.c" 85 #include "keywords.c" 86 86 87 87 88 /* Macros to append to our phrase collection l 88 /* Macros to append to our phrase collection list. */ 89 89 90 /* 90 /* 91 * We mark any token, that that equals to a kn 91 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this 92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union member 93 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 94 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, 95 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, s 96 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linu 97 * so far it was only observed in include/linux/telephony.h. 98 */ 98 */ 99 #define _APP(T,L) do { 99 #define _APP(T,L) do { \ 100 cur_node = next_node 100 cur_node = next_node; \ 101 next_node = xmalloc( 101 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 102 next_node->next = cur_node; \ 103 cur_node->string = m 103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = 104 cur_node->tag = \ 105 find_symbol(cur_no 105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : S 106 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_ 107 cur_node->in_source_file = in_source_file; \ 108 } while (0) 108 } while (0) 109 109 110 #define APP _APP(yytext, yyleng) 110 #define APP _APP(yytext, yyleng) 111 111 112 112 113 /* The second stage lexer. Here we incorporat 113 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 114 of the parser to tailor the tokens that are returned. */ 115 115 116 int 116 int 117 yylex(void) 117 yylex(void) 118 { 118 { 119 static enum { 119 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, 122 } lexstate = ST_NOTSTARTED; 122 } lexstate = ST_NOTSTARTED; 123 123 124 static int suppress_type_lookup, dont_want_b 124 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 125 static struct string_list *next_node; 126 static char *source_file; 126 static char *source_file; 127 127 128 int token, count = 0; 128 int token, count = 0; 129 struct string_list *cur_node; 129 struct string_list *cur_node; 130 130 131 if (lexstate == ST_NOTSTARTED) 131 if (lexstate == ST_NOTSTARTED) 132 { 132 { 133 next_node = xmalloc(sizeof(*next_node)); 133 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 134 next_node->next = NULL; 135 lexstate = ST_NORMAL; 135 lexstate = ST_NORMAL; 136 } 136 } 137 137 138 repeat: 138 repeat: 139 token = yylex1(); 139 token = yylex1(); 140 140 141 if (token == 0) 141 if (token == 0) 142 return 0; 142 return 0; 143 else if (token == FILENAME) 143 else if (token == FILENAME) 144 { 144 { 145 char *file, *e; 145 char *file, *e; 146 146 147 /* Save the filename and line number for 147 /* Save the filename and line number for later error messages. */ 148 148 149 if (cur_filename) 149 if (cur_filename) 150 free(cur_filename); 150 free(cur_filename); 151 151 152 file = strchr(yytext, '\"')+1; 152 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 153 e = strchr(file, '\"'); 154 *e = '\0'; 154 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 155 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 156 cur_line = atoi(yytext+2); 157 157 158 if (!source_file) { 158 if (!source_file) { 159 source_file = xstrdup(cur_filename); 159 source_file = xstrdup(cur_filename); 160 in_source_file = 1; 160 in_source_file = 1; 161 } else { 161 } else { 162 in_source_file = (strcmp(cur_filename, 162 in_source_file = (strcmp(cur_filename, source_file) == 0); 163 } 163 } 164 164 165 goto repeat; 165 goto repeat; 166 } 166 } 167 167 168 switch (lexstate) 168 switch (lexstate) 169 { 169 { 170 case ST_NORMAL: 170 case ST_NORMAL: 171 switch (token) 171 switch (token) 172 { 172 { 173 case IDENT: 173 case IDENT: 174 APP; 174 APP; 175 { 175 { 176 int r = is_reserved_word(yytext, y 176 int r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) 177 if (r >= 0) 178 { 178 { 179 switch (token = r) 179 switch (token = r) 180 { 180 { 181 case ATTRIBUTE_KEYW: 181 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 182 lexstate = ST_ATTRIBUTE; 183 count = 0; 183 count = 0; 184 goto repeat; 184 goto repeat; 185 case ASM_KEYW: 185 case ASM_KEYW: 186 lexstate = ST_ASM; 186 lexstate = ST_ASM; 187 count = 0; 187 count = 0; 188 goto repeat; 188 goto repeat; 189 case TYPEOF_KEYW: 189 case TYPEOF_KEYW: 190 lexstate = ST_TYPEOF; 190 lexstate = ST_TYPEOF; 191 count = 0; 191 count = 0; 192 goto repeat; 192 goto repeat; 193 193 194 case STRUCT_KEYW: 194 case STRUCT_KEYW: 195 case UNION_KEYW: 195 case UNION_KEYW: 196 case ENUM_KEYW: 196 case ENUM_KEYW: 197 dont_want_brace_phrase = 3 197 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 198 suppress_type_lookup = 2; 199 goto fini; 199 goto fini; 200 200 201 case EXPORT_SYMBOL_KEYW: 201 case EXPORT_SYMBOL_KEYW: 202 goto fini; 202 goto fini; 203 203 204 case STATIC_ASSERT_KEYW: 204 case STATIC_ASSERT_KEYW: 205 lexstate = ST_STATIC_ASSER 205 lexstate = ST_STATIC_ASSERT; 206 count = 0; 206 count = 0; 207 goto repeat; 207 goto repeat; 208 } 208 } 209 } 209 } 210 if (!suppress_type_lookup) 210 if (!suppress_type_lookup) 211 { 211 { 212 if (find_symbol(yytext, SYM_TY 212 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 213 token = TYPE; 213 token = TYPE; 214 } 214 } 215 } 215 } 216 break; 216 break; 217 217 218 case '[': 218 case '[': 219 APP; 219 APP; 220 lexstate = ST_BRACKET; 220 lexstate = ST_BRACKET; 221 count = 1; 221 count = 1; 222 goto repeat; 222 goto repeat; 223 223 224 case '{': 224 case '{': 225 APP; 225 APP; 226 if (dont_want_brace_phrase) 226 if (dont_want_brace_phrase) 227 break; 227 break; 228 lexstate = ST_BRACE; 228 lexstate = ST_BRACE; 229 count = 1; 229 count = 1; 230 goto repeat; 230 goto repeat; 231 231 232 case '=': case ':': 232 case '=': case ':': 233 APP; 233 APP; 234 lexstate = ST_EXPRESSION; 234 lexstate = ST_EXPRESSION; 235 break; 235 break; 236 236 237 default: 237 default: 238 APP; 238 APP; 239 break; 239 break; 240 } 240 } 241 break; 241 break; 242 242 243 case ST_ATTRIBUTE: 243 case ST_ATTRIBUTE: 244 APP; 244 APP; 245 switch (token) 245 switch (token) 246 { 246 { 247 case '(': 247 case '(': 248 ++count; 248 ++count; 249 goto repeat; 249 goto repeat; 250 case ')': 250 case ')': 251 if (--count == 0) 251 if (--count == 0) 252 { 252 { 253 lexstate = ST_NORMAL; 253 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 254 token = ATTRIBUTE_PHRASE; 255 break; 255 break; 256 } 256 } 257 goto repeat; 257 goto repeat; 258 default: 258 default: 259 goto repeat; 259 goto repeat; 260 } 260 } 261 break; 261 break; 262 262 263 case ST_ASM: 263 case ST_ASM: 264 APP; 264 APP; 265 switch (token) 265 switch (token) 266 { 266 { 267 case '(': 267 case '(': 268 ++count; 268 ++count; 269 goto repeat; 269 goto repeat; 270 case ')': 270 case ')': 271 if (--count == 0) 271 if (--count == 0) 272 { 272 { 273 lexstate = ST_NORMAL; 273 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 274 token = ASM_PHRASE; 275 break; 275 break; 276 } 276 } 277 goto repeat; 277 goto repeat; 278 default: 278 default: 279 goto repeat; 279 goto repeat; 280 } 280 } 281 break; 281 break; 282 282 283 case ST_TYPEOF_1: 283 case ST_TYPEOF_1: 284 if (token == IDENT) 284 if (token == IDENT) 285 { 285 { 286 if (is_reserved_word(yytext, yyleng) 286 if (is_reserved_word(yytext, yyleng) >= 0 287 || find_symbol(yytext, SYM_TYPED 287 || find_symbol(yytext, SYM_TYPEDEF, 1)) 288 { 288 { 289 yyless(0); 289 yyless(0); 290 unput('('); 290 unput('('); 291 lexstate = ST_NORMAL; 291 lexstate = ST_NORMAL; 292 token = TYPEOF_KEYW; 292 token = TYPEOF_KEYW; 293 break; 293 break; 294 } 294 } 295 _APP("(", 1); 295 _APP("(", 1); 296 } 296 } 297 lexstate = ST_TYPEOF; 297 lexstate = ST_TYPEOF; 298 /* FALLTHRU */ 298 /* FALLTHRU */ 299 299 300 case ST_TYPEOF: 300 case ST_TYPEOF: 301 switch (token) 301 switch (token) 302 { 302 { 303 case '(': 303 case '(': 304 if ( ++count == 1 ) 304 if ( ++count == 1 ) 305 lexstate = ST_TYPEOF_1; 305 lexstate = ST_TYPEOF_1; 306 else 306 else 307 APP; 307 APP; 308 goto repeat; 308 goto repeat; 309 case ')': 309 case ')': 310 APP; 310 APP; 311 if (--count == 0) 311 if (--count == 0) 312 { 312 { 313 lexstate = ST_NORMAL; 313 lexstate = ST_NORMAL; 314 token = TYPEOF_PHRASE; 314 token = TYPEOF_PHRASE; 315 break; 315 break; 316 } 316 } 317 goto repeat; 317 goto repeat; 318 default: 318 default: 319 APP; 319 APP; 320 goto repeat; 320 goto repeat; 321 } 321 } 322 break; 322 break; 323 323 324 case ST_BRACKET: 324 case ST_BRACKET: 325 APP; 325 APP; 326 switch (token) 326 switch (token) 327 { 327 { 328 case '[': 328 case '[': 329 ++count; 329 ++count; 330 goto repeat; 330 goto repeat; 331 case ']': 331 case ']': 332 if (--count == 0) 332 if (--count == 0) 333 { 333 { 334 lexstate = ST_NORMAL; 334 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 335 token = BRACKET_PHRASE; 336 break; 336 break; 337 } 337 } 338 goto repeat; 338 goto repeat; 339 default: 339 default: 340 goto repeat; 340 goto repeat; 341 } 341 } 342 break; 342 break; 343 343 344 case ST_BRACE: 344 case ST_BRACE: 345 APP; 345 APP; 346 switch (token) 346 switch (token) 347 { 347 { 348 case '{': 348 case '{': 349 ++count; 349 ++count; 350 goto repeat; 350 goto repeat; 351 case '}': 351 case '}': 352 if (--count == 0) 352 if (--count == 0) 353 { 353 { 354 lexstate = ST_NORMAL; 354 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 355 token = BRACE_PHRASE; 356 break; 356 break; 357 } 357 } 358 goto repeat; 358 goto repeat; 359 default: 359 default: 360 goto repeat; 360 goto repeat; 361 } 361 } 362 break; 362 break; 363 363 364 case ST_EXPRESSION: 364 case ST_EXPRESSION: 365 switch (token) 365 switch (token) 366 { 366 { 367 case '(': case '[': case '{': 367 case '(': case '[': case '{': 368 ++count; 368 ++count; 369 APP; 369 APP; 370 goto repeat; 370 goto repeat; 371 case '}': 371 case '}': 372 /* is this the last line of an enum 372 /* is this the last line of an enum declaration? */ 373 if (count == 0) 373 if (count == 0) 374 { 374 { 375 /* Put back the token we just re 375 /* Put back the token we just read so's we can find it again 376 after registering the express 376 after registering the expression. */ 377 unput(token); 377 unput(token); 378 378 379 lexstate = ST_NORMAL; 379 lexstate = ST_NORMAL; 380 token = EXPRESSION_PHRASE; 380 token = EXPRESSION_PHRASE; 381 break; 381 break; 382 } 382 } 383 /* FALLTHRU */ 383 /* FALLTHRU */ 384 case ')': case ']': 384 case ')': case ']': 385 --count; 385 --count; 386 APP; 386 APP; 387 goto repeat; 387 goto repeat; 388 case ',': case ';': 388 case ',': case ';': 389 if (count == 0) 389 if (count == 0) 390 { 390 { 391 /* Put back the token we just re 391 /* Put back the token we just read so's we can find it again 392 after registering the express 392 after registering the expression. */ 393 unput(token); 393 unput(token); 394 394 395 lexstate = ST_NORMAL; 395 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 396 token = EXPRESSION_PHRASE; 397 break; 397 break; 398 } 398 } 399 APP; 399 APP; 400 goto repeat; 400 goto repeat; 401 default: 401 default: 402 APP; 402 APP; 403 goto repeat; 403 goto repeat; 404 } 404 } 405 break; 405 break; 406 406 407 case ST_STATIC_ASSERT: 407 case ST_STATIC_ASSERT: 408 APP; 408 APP; 409 switch (token) 409 switch (token) 410 { 410 { 411 case '(': 411 case '(': 412 ++count; 412 ++count; 413 goto repeat; 413 goto repeat; 414 case ')': 414 case ')': 415 if (--count == 0) 415 if (--count == 0) 416 { 416 { 417 lexstate = ST_NORMAL; 417 lexstate = ST_NORMAL; 418 token = STATIC_ASSERT_PHRASE; 418 token = STATIC_ASSERT_PHRASE; 419 break; 419 break; 420 } 420 } 421 goto repeat; 421 goto repeat; 422 default: 422 default: 423 goto repeat; 423 goto repeat; 424 } 424 } 425 break; 425 break; 426 426 427 default: 427 default: 428 exit(1); 428 exit(1); 429 } 429 } 430 fini: 430 fini: 431 431 432 if (suppress_type_lookup > 0) 432 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 433 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 434 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 435 --dont_want_brace_phrase; 436 436 437 yylval = &next_node->next; 437 yylval = &next_node->next; 438 438 439 return token; 439 return token; 440 } 440 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.