1 /* SPDX-License-Identifier: GPL-2.0-or-later * 1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 2 /* 3 * Lexical analysis for genksyms. 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 4 * Copyright 1996, 1997 Linux International. 5 * 5 * 6 * New implementation contributed by Richard H< 6 * New implementation contributed by Richard Henderson <rth@tamu.edu> 7 * Based on original work by Bjorn Ekwall <bj0r 7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se> 8 * 8 * 9 * Taken from Linux modutils 2.4.22. 9 * Taken from Linux modutils 2.4.22. 10 */ 10 */ 11 11 12 %{ 12 %{ 13 13 14 #include <limits.h> 14 #include <limits.h> 15 #include <stdlib.h> 15 #include <stdlib.h> 16 #include <string.h> 16 #include <string.h> 17 #include <ctype.h> 17 #include <ctype.h> 18 18 19 #include "genksyms.h" 19 #include "genksyms.h" 20 #include "parse.tab.h" 20 #include "parse.tab.h" 21 21 22 /* We've got a two-level lexer here. We let f 22 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 23 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 24 #define YY_DECL static int yylex1(void) 25 25 26 %} 26 %} 27 27 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 28 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 29 30 O_INT 0[0-7]* 30 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 31 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 32 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 34 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 35 36 FRAC ([0-9]*\.[0-9]+)|([0-9 36 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 37 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 38 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 39 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 40 41 STRING L?\"([^\\\"]*\\.)*[^\\ 41 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 42 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 43 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 45 46 /* We don't do multiple input files. */ 46 /* We don't do multiple input files. */ 47 %option noyywrap 47 %option noyywrap 48 48 49 %option noinput 49 %option noinput 50 50 51 %% 51 %% 52 52 53 53 54 /* Keep track of our location in the original 54 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 56 ^#.*\n cur_line++; 57 \n cur_li 57 \n cur_line++; 58 58 59 /* Ignore all other whitespace. */ 59 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 60 [ \t\f\v\r]+ ; 61 61 62 62 63 {STRING} return 63 {STRING} return STRING; 64 {CHAR} return 64 {CHAR} return CHAR; 65 {IDENT} return 65 {IDENT} return IDENT; 66 66 67 /* The Pedant requires that the other C multi 67 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 68 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 69 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 70 around them properly. */ 71 {MC_TOKEN} return 71 {MC_TOKEN} return OTHER; 72 {INT} return 72 {INT} return INT; 73 {REAL} return 73 {REAL} return REAL; 74 74 75 "..." return 75 "..." return DOTS; 76 76 77 /* All other tokens are single characters. * 77 /* All other tokens are single characters. */ 78 . return 78 . return yytext[0]; 79 79 80 80 81 %% 81 %% 82 82 83 /* Bring in the keyword recognizer. */ 83 /* Bring in the keyword recognizer. */ 84 84 85 #include "keywords.c" 85 #include "keywords.c" 86 86 87 87 88 /* Macros to append to our phrase collection l 88 /* Macros to append to our phrase collection list. */ 89 89 90 /* 90 /* 91 * We mark any token, that that equals to a kn 91 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this 92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union member 93 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 94 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, 95 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, s 96 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linu 97 * so far it was only observed in include/linux/telephony.h. 98 */ 98 */ 99 #define _APP(T,L) do { 99 #define _APP(T,L) do { \ 100 cur_node = next_node 100 cur_node = next_node; \ 101 next_node = xmalloc( 101 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 102 next_node->next = cur_node; \ 103 cur_node->string = m 103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = 104 cur_node->tag = \ 105 find_symbol(cur_no 105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : S 106 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_ 107 cur_node->in_source_file = in_source_file; \ 108 } while (0) 108 } while (0) 109 109 110 #define APP _APP(yytext, yyleng) 110 #define APP _APP(yytext, yyleng) 111 111 112 112 113 /* The second stage lexer. Here we incorporat 113 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 114 of the parser to tailor the tokens that are returned. */ 115 115 116 int 116 int 117 yylex(void) 117 yylex(void) 118 { 118 { 119 static enum { 119 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, >> 122 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, >> 123 ST_TABLE_5, ST_TABLE_6 122 } lexstate = ST_NOTSTARTED; 124 } lexstate = ST_NOTSTARTED; 123 125 124 static int suppress_type_lookup, dont_want_b 126 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 127 static struct string_list *next_node; 126 static char *source_file; << 127 128 128 int token, count = 0; 129 int token, count = 0; 129 struct string_list *cur_node; 130 struct string_list *cur_node; 130 131 131 if (lexstate == ST_NOTSTARTED) 132 if (lexstate == ST_NOTSTARTED) 132 { 133 { 133 next_node = xmalloc(sizeof(*next_node)); 134 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 135 next_node->next = NULL; 135 lexstate = ST_NORMAL; 136 lexstate = ST_NORMAL; 136 } 137 } 137 138 138 repeat: 139 repeat: 139 token = yylex1(); 140 token = yylex1(); 140 141 141 if (token == 0) 142 if (token == 0) 142 return 0; 143 return 0; 143 else if (token == FILENAME) 144 else if (token == FILENAME) 144 { 145 { 145 char *file, *e; 146 char *file, *e; 146 147 147 /* Save the filename and line number for 148 /* Save the filename and line number for later error messages. */ 148 149 149 if (cur_filename) 150 if (cur_filename) 150 free(cur_filename); 151 free(cur_filename); 151 152 152 file = strchr(yytext, '\"')+1; 153 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 154 e = strchr(file, '\"'); 154 *e = '\0'; 155 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 156 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 157 cur_line = atoi(yytext+2); 157 158 158 if (!source_file) { 159 if (!source_file) { 159 source_file = xstrdup(cur_filename); 160 source_file = xstrdup(cur_filename); 160 in_source_file = 1; 161 in_source_file = 1; 161 } else { 162 } else { 162 in_source_file = (strcmp(cur_filename, 163 in_source_file = (strcmp(cur_filename, source_file) == 0); 163 } 164 } 164 165 165 goto repeat; 166 goto repeat; 166 } 167 } 167 168 168 switch (lexstate) 169 switch (lexstate) 169 { 170 { 170 case ST_NORMAL: 171 case ST_NORMAL: 171 switch (token) 172 switch (token) 172 { 173 { 173 case IDENT: 174 case IDENT: 174 APP; 175 APP; 175 { 176 { 176 int r = is_reserved_word(yytext, y 177 int r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) 178 if (r >= 0) 178 { 179 { 179 switch (token = r) 180 switch (token = r) 180 { 181 { 181 case ATTRIBUTE_KEYW: 182 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 183 lexstate = ST_ATTRIBUTE; 183 count = 0; 184 count = 0; 184 goto repeat; 185 goto repeat; 185 case ASM_KEYW: 186 case ASM_KEYW: 186 lexstate = ST_ASM; 187 lexstate = ST_ASM; 187 count = 0; 188 count = 0; 188 goto repeat; 189 goto repeat; 189 case TYPEOF_KEYW: 190 case TYPEOF_KEYW: 190 lexstate = ST_TYPEOF; 191 lexstate = ST_TYPEOF; 191 count = 0; 192 count = 0; 192 goto repeat; 193 goto repeat; 193 194 194 case STRUCT_KEYW: 195 case STRUCT_KEYW: 195 case UNION_KEYW: 196 case UNION_KEYW: 196 case ENUM_KEYW: 197 case ENUM_KEYW: 197 dont_want_brace_phrase = 3 198 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 199 suppress_type_lookup = 2; 199 goto fini; 200 goto fini; 200 201 201 case EXPORT_SYMBOL_KEYW: 202 case EXPORT_SYMBOL_KEYW: 202 goto fini; 203 goto fini; 203 204 204 case STATIC_ASSERT_KEYW: 205 case STATIC_ASSERT_KEYW: 205 lexstate = ST_STATIC_ASSER 206 lexstate = ST_STATIC_ASSERT; 206 count = 0; 207 count = 0; 207 goto repeat; 208 goto repeat; 208 } 209 } 209 } 210 } 210 if (!suppress_type_lookup) 211 if (!suppress_type_lookup) 211 { 212 { 212 if (find_symbol(yytext, SYM_TY 213 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 213 token = TYPE; 214 token = TYPE; 214 } 215 } 215 } 216 } 216 break; 217 break; 217 218 218 case '[': 219 case '[': 219 APP; 220 APP; 220 lexstate = ST_BRACKET; 221 lexstate = ST_BRACKET; 221 count = 1; 222 count = 1; 222 goto repeat; 223 goto repeat; 223 224 224 case '{': 225 case '{': 225 APP; 226 APP; 226 if (dont_want_brace_phrase) 227 if (dont_want_brace_phrase) 227 break; 228 break; 228 lexstate = ST_BRACE; 229 lexstate = ST_BRACE; 229 count = 1; 230 count = 1; 230 goto repeat; 231 goto repeat; 231 232 232 case '=': case ':': 233 case '=': case ':': 233 APP; 234 APP; 234 lexstate = ST_EXPRESSION; 235 lexstate = ST_EXPRESSION; 235 break; 236 break; 236 237 >> 238 case DOTS: 237 default: 239 default: 238 APP; 240 APP; 239 break; 241 break; 240 } 242 } 241 break; 243 break; 242 244 243 case ST_ATTRIBUTE: 245 case ST_ATTRIBUTE: 244 APP; 246 APP; 245 switch (token) 247 switch (token) 246 { 248 { 247 case '(': 249 case '(': 248 ++count; 250 ++count; 249 goto repeat; 251 goto repeat; 250 case ')': 252 case ')': 251 if (--count == 0) 253 if (--count == 0) 252 { 254 { 253 lexstate = ST_NORMAL; 255 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 256 token = ATTRIBUTE_PHRASE; 255 break; 257 break; 256 } 258 } 257 goto repeat; 259 goto repeat; 258 default: 260 default: 259 goto repeat; 261 goto repeat; 260 } 262 } 261 break; 263 break; 262 264 263 case ST_ASM: 265 case ST_ASM: 264 APP; 266 APP; 265 switch (token) 267 switch (token) 266 { 268 { 267 case '(': 269 case '(': 268 ++count; 270 ++count; 269 goto repeat; 271 goto repeat; 270 case ')': 272 case ')': 271 if (--count == 0) 273 if (--count == 0) 272 { 274 { 273 lexstate = ST_NORMAL; 275 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 276 token = ASM_PHRASE; 275 break; 277 break; 276 } 278 } 277 goto repeat; 279 goto repeat; 278 default: 280 default: 279 goto repeat; 281 goto repeat; 280 } 282 } 281 break; 283 break; 282 284 283 case ST_TYPEOF_1: 285 case ST_TYPEOF_1: 284 if (token == IDENT) 286 if (token == IDENT) 285 { 287 { 286 if (is_reserved_word(yytext, yyleng) 288 if (is_reserved_word(yytext, yyleng) >= 0 287 || find_symbol(yytext, SYM_TYPED 289 || find_symbol(yytext, SYM_TYPEDEF, 1)) 288 { 290 { 289 yyless(0); 291 yyless(0); 290 unput('('); 292 unput('('); 291 lexstate = ST_NORMAL; 293 lexstate = ST_NORMAL; 292 token = TYPEOF_KEYW; 294 token = TYPEOF_KEYW; 293 break; 295 break; 294 } 296 } 295 _APP("(", 1); 297 _APP("(", 1); 296 } 298 } 297 lexstate = ST_TYPEOF; 299 lexstate = ST_TYPEOF; 298 /* FALLTHRU */ 300 /* FALLTHRU */ 299 301 300 case ST_TYPEOF: 302 case ST_TYPEOF: 301 switch (token) 303 switch (token) 302 { 304 { 303 case '(': 305 case '(': 304 if ( ++count == 1 ) 306 if ( ++count == 1 ) 305 lexstate = ST_TYPEOF_1; 307 lexstate = ST_TYPEOF_1; 306 else 308 else 307 APP; 309 APP; 308 goto repeat; 310 goto repeat; 309 case ')': 311 case ')': 310 APP; 312 APP; 311 if (--count == 0) 313 if (--count == 0) 312 { 314 { 313 lexstate = ST_NORMAL; 315 lexstate = ST_NORMAL; 314 token = TYPEOF_PHRASE; 316 token = TYPEOF_PHRASE; 315 break; 317 break; 316 } 318 } 317 goto repeat; 319 goto repeat; 318 default: 320 default: 319 APP; 321 APP; 320 goto repeat; 322 goto repeat; 321 } 323 } 322 break; 324 break; 323 325 324 case ST_BRACKET: 326 case ST_BRACKET: 325 APP; 327 APP; 326 switch (token) 328 switch (token) 327 { 329 { 328 case '[': 330 case '[': 329 ++count; 331 ++count; 330 goto repeat; 332 goto repeat; 331 case ']': 333 case ']': 332 if (--count == 0) 334 if (--count == 0) 333 { 335 { 334 lexstate = ST_NORMAL; 336 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 337 token = BRACKET_PHRASE; 336 break; 338 break; 337 } 339 } 338 goto repeat; 340 goto repeat; 339 default: 341 default: 340 goto repeat; 342 goto repeat; 341 } 343 } 342 break; 344 break; 343 345 344 case ST_BRACE: 346 case ST_BRACE: 345 APP; 347 APP; 346 switch (token) 348 switch (token) 347 { 349 { 348 case '{': 350 case '{': 349 ++count; 351 ++count; 350 goto repeat; 352 goto repeat; 351 case '}': 353 case '}': 352 if (--count == 0) 354 if (--count == 0) 353 { 355 { 354 lexstate = ST_NORMAL; 356 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 357 token = BRACE_PHRASE; 356 break; 358 break; 357 } 359 } 358 goto repeat; 360 goto repeat; 359 default: 361 default: 360 goto repeat; 362 goto repeat; 361 } 363 } 362 break; 364 break; 363 365 364 case ST_EXPRESSION: 366 case ST_EXPRESSION: 365 switch (token) 367 switch (token) 366 { 368 { 367 case '(': case '[': case '{': 369 case '(': case '[': case '{': 368 ++count; 370 ++count; 369 APP; 371 APP; 370 goto repeat; 372 goto repeat; 371 case '}': 373 case '}': 372 /* is this the last line of an enum 374 /* is this the last line of an enum declaration? */ 373 if (count == 0) 375 if (count == 0) 374 { 376 { 375 /* Put back the token we just re 377 /* Put back the token we just read so's we can find it again 376 after registering the express 378 after registering the expression. */ 377 unput(token); 379 unput(token); 378 380 379 lexstate = ST_NORMAL; 381 lexstate = ST_NORMAL; 380 token = EXPRESSION_PHRASE; 382 token = EXPRESSION_PHRASE; 381 break; 383 break; 382 } 384 } 383 /* FALLTHRU */ 385 /* FALLTHRU */ 384 case ')': case ']': 386 case ')': case ']': 385 --count; 387 --count; 386 APP; 388 APP; 387 goto repeat; 389 goto repeat; 388 case ',': case ';': 390 case ',': case ';': 389 if (count == 0) 391 if (count == 0) 390 { 392 { 391 /* Put back the token we just re 393 /* Put back the token we just read so's we can find it again 392 after registering the express 394 after registering the expression. */ 393 unput(token); 395 unput(token); 394 396 395 lexstate = ST_NORMAL; 397 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 398 token = EXPRESSION_PHRASE; 397 break; 399 break; 398 } 400 } 399 APP; 401 APP; 400 goto repeat; 402 goto repeat; 401 default: 403 default: 402 APP; 404 APP; 403 goto repeat; 405 goto repeat; 404 } 406 } 405 break; 407 break; 406 408 407 case ST_STATIC_ASSERT: 409 case ST_STATIC_ASSERT: 408 APP; 410 APP; 409 switch (token) 411 switch (token) 410 { 412 { 411 case '(': 413 case '(': 412 ++count; 414 ++count; 413 goto repeat; 415 goto repeat; 414 case ')': 416 case ')': 415 if (--count == 0) 417 if (--count == 0) 416 { 418 { 417 lexstate = ST_NORMAL; 419 lexstate = ST_NORMAL; 418 token = STATIC_ASSERT_PHRASE; 420 token = STATIC_ASSERT_PHRASE; 419 break; 421 break; 420 } 422 } 421 goto repeat; 423 goto repeat; 422 default: 424 default: 423 goto repeat; 425 goto repeat; >> 426 } >> 427 break; >> 428 >> 429 case ST_TABLE_1: >> 430 goto repeat; >> 431 >> 432 case ST_TABLE_2: >> 433 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') >> 434 { >> 435 token = EXPORT_SYMBOL_KEYW; >> 436 lexstate = ST_TABLE_5; >> 437 APP; >> 438 break; >> 439 } >> 440 lexstate = ST_TABLE_6; >> 441 /* FALLTHRU */ >> 442 >> 443 case ST_TABLE_6: >> 444 switch (token) >> 445 { >> 446 case '{': case '[': case '(': >> 447 ++count; >> 448 break; >> 449 case '}': case ']': case ')': >> 450 --count; >> 451 break; >> 452 case ',': >> 453 if (count == 0) >> 454 lexstate = ST_TABLE_2; >> 455 break; >> 456 }; >> 457 goto repeat; >> 458 >> 459 case ST_TABLE_3: >> 460 goto repeat; >> 461 >> 462 case ST_TABLE_4: >> 463 if (token == ';') >> 464 lexstate = ST_NORMAL; >> 465 goto repeat; >> 466 >> 467 case ST_TABLE_5: >> 468 switch (token) >> 469 { >> 470 case ',': >> 471 token = ';'; >> 472 lexstate = ST_TABLE_2; >> 473 APP; >> 474 break; >> 475 default: >> 476 APP; >> 477 break; 424 } 478 } 425 break; 479 break; 426 480 427 default: 481 default: 428 exit(1); 482 exit(1); 429 } 483 } 430 fini: 484 fini: 431 485 432 if (suppress_type_lookup > 0) 486 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 487 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 488 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 489 --dont_want_brace_phrase; 436 490 437 yylval = &next_node->next; 491 yylval = &next_node->next; 438 492 439 return token; 493 return token; 440 } 494 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.