1 /* SPDX-License-Identifier: GPL-2.0-or-later * 1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 2 /* 3 * Lexical analysis for genksyms. 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 4 * Copyright 1996, 1997 Linux International. 5 * 5 * 6 * New implementation contributed by Richard H< 6 * New implementation contributed by Richard Henderson <rth@tamu.edu> 7 * Based on original work by Bjorn Ekwall <bj0r 7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se> 8 * 8 * 9 * Taken from Linux modutils 2.4.22. 9 * Taken from Linux modutils 2.4.22. 10 */ 10 */ 11 11 12 %{ 12 %{ 13 13 14 #include <limits.h> 14 #include <limits.h> 15 #include <stdlib.h> 15 #include <stdlib.h> 16 #include <string.h> 16 #include <string.h> 17 #include <ctype.h> 17 #include <ctype.h> 18 18 19 #include "genksyms.h" 19 #include "genksyms.h" 20 #include "parse.tab.h" 20 #include "parse.tab.h" 21 21 22 /* We've got a two-level lexer here. We let f 22 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 23 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 24 #define YY_DECL static int yylex1(void) 25 25 26 %} 26 %} 27 27 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 28 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 29 30 O_INT 0[0-7]* 30 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 31 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 32 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 34 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 35 36 FRAC ([0-9]*\.[0-9]+)|([0-9 36 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 37 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 38 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 39 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 40 41 STRING L?\"([^\\\"]*\\.)*[^\\ 41 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 42 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 43 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 45 46 /* We don't do multiple input files. */ 46 /* We don't do multiple input files. */ 47 %option noyywrap 47 %option noyywrap 48 48 49 %option noinput 49 %option noinput 50 50 51 %% 51 %% 52 52 53 53 54 /* Keep track of our location in the original 54 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 56 ^#.*\n cur_line++; 57 \n cur_li 57 \n cur_line++; 58 58 59 /* Ignore all other whitespace. */ 59 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 60 [ \t\f\v\r]+ ; 61 61 62 62 63 {STRING} return 63 {STRING} return STRING; 64 {CHAR} return 64 {CHAR} return CHAR; 65 {IDENT} return 65 {IDENT} return IDENT; 66 66 67 /* The Pedant requires that the other C multi 67 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 68 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 69 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 70 around them properly. */ 71 {MC_TOKEN} return 71 {MC_TOKEN} return OTHER; 72 {INT} return 72 {INT} return INT; 73 {REAL} return 73 {REAL} return REAL; 74 74 75 "..." return 75 "..." return DOTS; 76 76 77 /* All other tokens are single characters. * 77 /* All other tokens are single characters. */ 78 . return 78 . return yytext[0]; 79 79 80 80 81 %% 81 %% 82 82 83 /* Bring in the keyword recognizer. */ 83 /* Bring in the keyword recognizer. */ 84 84 85 #include "keywords.c" 85 #include "keywords.c" 86 86 87 87 88 /* Macros to append to our phrase collection l 88 /* Macros to append to our phrase collection list. */ 89 89 90 /* 90 /* 91 * We mark any token, that that equals to a kn 91 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this 92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union member 93 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 94 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, 95 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, s 96 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linu 97 * so far it was only observed in include/linux/telephony.h. 98 */ 98 */ 99 #define _APP(T,L) do { 99 #define _APP(T,L) do { \ 100 cur_node = next_node 100 cur_node = next_node; \ 101 next_node = xmalloc( 101 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 102 next_node->next = cur_node; \ 103 cur_node->string = m 103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = 104 cur_node->tag = \ 105 find_symbol(cur_no 105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : S 106 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_ 107 cur_node->in_source_file = in_source_file; \ 108 } while (0) 108 } while (0) 109 109 110 #define APP _APP(yytext, yyleng) 110 #define APP _APP(yytext, yyleng) 111 111 112 112 113 /* The second stage lexer. Here we incorporat 113 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 114 of the parser to tailor the tokens that are returned. */ 115 115 116 int 116 int 117 yylex(void) 117 yylex(void) 118 { 118 { 119 static enum { 119 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST !! 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, >> 122 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, >> 123 ST_TABLE_5, ST_TABLE_6 122 } lexstate = ST_NOTSTARTED; 124 } lexstate = ST_NOTSTARTED; 123 125 124 static int suppress_type_lookup, dont_want_b 126 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 127 static struct string_list *next_node; 126 static char *source_file; << 127 128 128 int token, count = 0; 129 int token, count = 0; 129 struct string_list *cur_node; 130 struct string_list *cur_node; 130 131 131 if (lexstate == ST_NOTSTARTED) 132 if (lexstate == ST_NOTSTARTED) 132 { 133 { 133 next_node = xmalloc(sizeof(*next_node)); 134 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 135 next_node->next = NULL; 135 lexstate = ST_NORMAL; 136 lexstate = ST_NORMAL; 136 } 137 } 137 138 138 repeat: 139 repeat: 139 token = yylex1(); 140 token = yylex1(); 140 141 141 if (token == 0) 142 if (token == 0) 142 return 0; 143 return 0; 143 else if (token == FILENAME) 144 else if (token == FILENAME) 144 { 145 { 145 char *file, *e; 146 char *file, *e; 146 147 147 /* Save the filename and line number for 148 /* Save the filename and line number for later error messages. */ 148 149 149 if (cur_filename) 150 if (cur_filename) 150 free(cur_filename); 151 free(cur_filename); 151 152 152 file = strchr(yytext, '\"')+1; 153 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 154 e = strchr(file, '\"'); 154 *e = '\0'; 155 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 156 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 157 cur_line = atoi(yytext+2); 157 158 158 if (!source_file) { 159 if (!source_file) { 159 source_file = xstrdup(cur_filename); 160 source_file = xstrdup(cur_filename); 160 in_source_file = 1; 161 in_source_file = 1; 161 } else { 162 } else { 162 in_source_file = (strcmp(cur_filename, 163 in_source_file = (strcmp(cur_filename, source_file) == 0); 163 } 164 } 164 165 165 goto repeat; 166 goto repeat; 166 } 167 } 167 168 168 switch (lexstate) 169 switch (lexstate) 169 { 170 { 170 case ST_NORMAL: 171 case ST_NORMAL: 171 switch (token) 172 switch (token) 172 { 173 { 173 case IDENT: 174 case IDENT: 174 APP; 175 APP; 175 { 176 { 176 int r = is_reserved_word(yytext, y 177 int r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) 178 if (r >= 0) 178 { 179 { 179 switch (token = r) 180 switch (token = r) 180 { 181 { 181 case ATTRIBUTE_KEYW: 182 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 183 lexstate = ST_ATTRIBUTE; 183 count = 0; 184 count = 0; 184 goto repeat; 185 goto repeat; 185 case ASM_KEYW: 186 case ASM_KEYW: 186 lexstate = ST_ASM; 187 lexstate = ST_ASM; 187 count = 0; 188 count = 0; 188 goto repeat; 189 goto repeat; 189 case TYPEOF_KEYW: 190 case TYPEOF_KEYW: 190 lexstate = ST_TYPEOF; 191 lexstate = ST_TYPEOF; 191 count = 0; 192 count = 0; 192 goto repeat; 193 goto repeat; 193 194 194 case STRUCT_KEYW: 195 case STRUCT_KEYW: 195 case UNION_KEYW: 196 case UNION_KEYW: 196 case ENUM_KEYW: 197 case ENUM_KEYW: 197 dont_want_brace_phrase = 3 198 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 199 suppress_type_lookup = 2; 199 goto fini; 200 goto fini; 200 201 201 case EXPORT_SYMBOL_KEYW: 202 case EXPORT_SYMBOL_KEYW: 202 goto fini; 203 goto fini; 203 << 204 case STATIC_ASSERT_KEYW: << 205 lexstate = ST_STATIC_ASSER << 206 count = 0; << 207 goto repeat; << 208 } 204 } 209 } 205 } 210 if (!suppress_type_lookup) 206 if (!suppress_type_lookup) 211 { 207 { 212 if (find_symbol(yytext, SYM_TY 208 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 213 token = TYPE; 209 token = TYPE; 214 } 210 } 215 } 211 } 216 break; 212 break; 217 213 218 case '[': 214 case '[': 219 APP; 215 APP; 220 lexstate = ST_BRACKET; 216 lexstate = ST_BRACKET; 221 count = 1; 217 count = 1; 222 goto repeat; 218 goto repeat; 223 219 224 case '{': 220 case '{': 225 APP; 221 APP; 226 if (dont_want_brace_phrase) 222 if (dont_want_brace_phrase) 227 break; 223 break; 228 lexstate = ST_BRACE; 224 lexstate = ST_BRACE; 229 count = 1; 225 count = 1; 230 goto repeat; 226 goto repeat; 231 227 232 case '=': case ':': 228 case '=': case ':': 233 APP; 229 APP; 234 lexstate = ST_EXPRESSION; 230 lexstate = ST_EXPRESSION; 235 break; 231 break; 236 232 >> 233 case DOTS: 237 default: 234 default: 238 APP; 235 APP; 239 break; 236 break; 240 } 237 } 241 break; 238 break; 242 239 243 case ST_ATTRIBUTE: 240 case ST_ATTRIBUTE: 244 APP; 241 APP; 245 switch (token) 242 switch (token) 246 { 243 { 247 case '(': 244 case '(': 248 ++count; 245 ++count; 249 goto repeat; 246 goto repeat; 250 case ')': 247 case ')': 251 if (--count == 0) 248 if (--count == 0) 252 { 249 { 253 lexstate = ST_NORMAL; 250 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 251 token = ATTRIBUTE_PHRASE; 255 break; 252 break; 256 } 253 } 257 goto repeat; 254 goto repeat; 258 default: 255 default: 259 goto repeat; 256 goto repeat; 260 } 257 } 261 break; 258 break; 262 259 263 case ST_ASM: 260 case ST_ASM: 264 APP; 261 APP; 265 switch (token) 262 switch (token) 266 { 263 { 267 case '(': 264 case '(': 268 ++count; 265 ++count; 269 goto repeat; 266 goto repeat; 270 case ')': 267 case ')': 271 if (--count == 0) 268 if (--count == 0) 272 { 269 { 273 lexstate = ST_NORMAL; 270 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 271 token = ASM_PHRASE; 275 break; 272 break; 276 } 273 } 277 goto repeat; 274 goto repeat; 278 default: 275 default: 279 goto repeat; 276 goto repeat; 280 } 277 } 281 break; 278 break; 282 279 283 case ST_TYPEOF_1: 280 case ST_TYPEOF_1: 284 if (token == IDENT) 281 if (token == IDENT) 285 { 282 { 286 if (is_reserved_word(yytext, yyleng) 283 if (is_reserved_word(yytext, yyleng) >= 0 287 || find_symbol(yytext, SYM_TYPED 284 || find_symbol(yytext, SYM_TYPEDEF, 1)) 288 { 285 { 289 yyless(0); 286 yyless(0); 290 unput('('); 287 unput('('); 291 lexstate = ST_NORMAL; 288 lexstate = ST_NORMAL; 292 token = TYPEOF_KEYW; 289 token = TYPEOF_KEYW; 293 break; 290 break; 294 } 291 } 295 _APP("(", 1); 292 _APP("(", 1); 296 } 293 } 297 lexstate = ST_TYPEOF; 294 lexstate = ST_TYPEOF; 298 /* FALLTHRU */ 295 /* FALLTHRU */ 299 296 300 case ST_TYPEOF: 297 case ST_TYPEOF: 301 switch (token) 298 switch (token) 302 { 299 { 303 case '(': 300 case '(': 304 if ( ++count == 1 ) 301 if ( ++count == 1 ) 305 lexstate = ST_TYPEOF_1; 302 lexstate = ST_TYPEOF_1; 306 else 303 else 307 APP; 304 APP; 308 goto repeat; 305 goto repeat; 309 case ')': 306 case ')': 310 APP; 307 APP; 311 if (--count == 0) 308 if (--count == 0) 312 { 309 { 313 lexstate = ST_NORMAL; 310 lexstate = ST_NORMAL; 314 token = TYPEOF_PHRASE; 311 token = TYPEOF_PHRASE; 315 break; 312 break; 316 } 313 } 317 goto repeat; 314 goto repeat; 318 default: 315 default: 319 APP; 316 APP; 320 goto repeat; 317 goto repeat; 321 } 318 } 322 break; 319 break; 323 320 324 case ST_BRACKET: 321 case ST_BRACKET: 325 APP; 322 APP; 326 switch (token) 323 switch (token) 327 { 324 { 328 case '[': 325 case '[': 329 ++count; 326 ++count; 330 goto repeat; 327 goto repeat; 331 case ']': 328 case ']': 332 if (--count == 0) 329 if (--count == 0) 333 { 330 { 334 lexstate = ST_NORMAL; 331 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 332 token = BRACKET_PHRASE; 336 break; 333 break; 337 } 334 } 338 goto repeat; 335 goto repeat; 339 default: 336 default: 340 goto repeat; 337 goto repeat; 341 } 338 } 342 break; 339 break; 343 340 344 case ST_BRACE: 341 case ST_BRACE: 345 APP; 342 APP; 346 switch (token) 343 switch (token) 347 { 344 { 348 case '{': 345 case '{': 349 ++count; 346 ++count; 350 goto repeat; 347 goto repeat; 351 case '}': 348 case '}': 352 if (--count == 0) 349 if (--count == 0) 353 { 350 { 354 lexstate = ST_NORMAL; 351 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 352 token = BRACE_PHRASE; 356 break; 353 break; 357 } 354 } 358 goto repeat; 355 goto repeat; 359 default: 356 default: 360 goto repeat; 357 goto repeat; 361 } 358 } 362 break; 359 break; 363 360 364 case ST_EXPRESSION: 361 case ST_EXPRESSION: 365 switch (token) 362 switch (token) 366 { 363 { 367 case '(': case '[': case '{': 364 case '(': case '[': case '{': 368 ++count; 365 ++count; 369 APP; 366 APP; 370 goto repeat; 367 goto repeat; 371 case '}': 368 case '}': 372 /* is this the last line of an enum 369 /* is this the last line of an enum declaration? */ 373 if (count == 0) 370 if (count == 0) 374 { 371 { 375 /* Put back the token we just re 372 /* Put back the token we just read so's we can find it again 376 after registering the express 373 after registering the expression. */ 377 unput(token); 374 unput(token); 378 375 379 lexstate = ST_NORMAL; 376 lexstate = ST_NORMAL; 380 token = EXPRESSION_PHRASE; 377 token = EXPRESSION_PHRASE; 381 break; 378 break; 382 } 379 } 383 /* FALLTHRU */ 380 /* FALLTHRU */ 384 case ')': case ']': 381 case ')': case ']': 385 --count; 382 --count; 386 APP; 383 APP; 387 goto repeat; 384 goto repeat; 388 case ',': case ';': 385 case ',': case ';': 389 if (count == 0) 386 if (count == 0) 390 { 387 { 391 /* Put back the token we just re 388 /* Put back the token we just read so's we can find it again 392 after registering the express 389 after registering the expression. */ 393 unput(token); 390 unput(token); 394 391 395 lexstate = ST_NORMAL; 392 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 393 token = EXPRESSION_PHRASE; 397 break; 394 break; 398 } 395 } 399 APP; 396 APP; 400 goto repeat; 397 goto repeat; 401 default: 398 default: 402 APP; 399 APP; 403 goto repeat; 400 goto repeat; 404 } 401 } 405 break; 402 break; 406 403 407 case ST_STATIC_ASSERT: !! 404 case ST_TABLE_1: 408 APP; !! 405 goto repeat; >> 406 >> 407 case ST_TABLE_2: >> 408 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') >> 409 { >> 410 token = EXPORT_SYMBOL_KEYW; >> 411 lexstate = ST_TABLE_5; >> 412 APP; >> 413 break; >> 414 } >> 415 lexstate = ST_TABLE_6; >> 416 /* FALLTHRU */ >> 417 >> 418 case ST_TABLE_6: 409 switch (token) 419 switch (token) 410 { 420 { 411 case '(': !! 421 case '{': case '[': case '(': 412 ++count; 422 ++count; 413 goto repeat; !! 423 break; 414 case ')': !! 424 case '}': case ']': case ')': 415 if (--count == 0) !! 425 --count; 416 { !! 426 break; 417 lexstate = ST_NORMAL; !! 427 case ',': 418 token = STATIC_ASSERT_PHRASE; !! 428 if (count == 0) 419 break; !! 429 lexstate = ST_TABLE_2; 420 } !! 430 break; 421 goto repeat; !! 431 }; >> 432 goto repeat; >> 433 >> 434 case ST_TABLE_3: >> 435 goto repeat; >> 436 >> 437 case ST_TABLE_4: >> 438 if (token == ';') >> 439 lexstate = ST_NORMAL; >> 440 goto repeat; >> 441 >> 442 case ST_TABLE_5: >> 443 switch (token) >> 444 { >> 445 case ',': >> 446 token = ';'; >> 447 lexstate = ST_TABLE_2; >> 448 APP; >> 449 break; 422 default: 450 default: 423 goto repeat; !! 451 APP; >> 452 break; 424 } 453 } 425 break; 454 break; 426 455 427 default: 456 default: 428 exit(1); 457 exit(1); 429 } 458 } 430 fini: 459 fini: 431 460 432 if (suppress_type_lookup > 0) 461 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 462 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 463 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 464 --dont_want_brace_phrase; 436 465 437 yylval = &next_node->next; 466 yylval = &next_node->next; 438 467 439 return token; 468 return token; 440 } 469 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.