1 /* SPDX-License-Identifier: GPL-2.0-or-later * !! 1 /* Lexical analysis for genksyms. 2 /* !! 2 Copyright 1996, 1997 Linux International. 3 * Lexical analysis for genksyms. !! 3 4 * Copyright 1996, 1997 Linux International. !! 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 * !! 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 * New implementation contributed by Richard H< !! 6 7 * Based on original work by Bjorn Ekwall <bj0r !! 7 Taken from Linux modutils 2.4.22. 8 * !! 8 9 * Taken from Linux modutils 2.4.22. !! 9 This program is free software; you can redistribute it and/or modify it 10 */ !! 10 under the terms of the GNU General Public License as published by the >> 11 Free Software Foundation; either version 2 of the License, or (at your >> 12 option) any later version. >> 13 >> 14 This program is distributed in the hope that it will be useful, but >> 15 WITHOUT ANY WARRANTY; without even the implied warranty of >> 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> 17 General Public License for more details. >> 18 >> 19 You should have received a copy of the GNU General Public License >> 20 along with this program; if not, write to the Free Software Foundation, >> 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ >> 22 11 23 12 %{ 24 %{ 13 25 14 #include <limits.h> 26 #include <limits.h> 15 #include <stdlib.h> 27 #include <stdlib.h> 16 #include <string.h> 28 #include <string.h> 17 #include <ctype.h> 29 #include <ctype.h> 18 30 19 #include "genksyms.h" 31 #include "genksyms.h" 20 #include "parse.tab.h" 32 #include "parse.tab.h" 21 33 22 /* We've got a two-level lexer here. We let f 34 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 35 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 36 #define YY_DECL static int yylex1(void) 25 37 26 %} 38 %} 27 39 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 41 30 O_INT 0[0-7]* 42 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 43 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 44 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 47 36 FRAC ([0-9]*\.[0-9]+)|([0-9 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 49 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 50 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 52 41 STRING L?\"([^\\\"]*\\.)*[^\\ 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 55 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 57 46 /* We don't do multiple input files. */ 58 /* We don't do multiple input files. */ 47 %option noyywrap 59 %option noyywrap 48 60 49 %option noinput 61 %option noinput 50 62 51 %% 63 %% 52 64 53 65 54 /* Keep track of our location in the original 66 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 67 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 68 ^#.*\n cur_line++; 57 \n cur_li 69 \n cur_line++; 58 70 59 /* Ignore all other whitespace. */ 71 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 72 [ \t\f\v\r]+ ; 61 73 62 74 63 {STRING} return 75 {STRING} return STRING; 64 {CHAR} return 76 {CHAR} return CHAR; 65 {IDENT} return 77 {IDENT} return IDENT; 66 78 67 /* The Pedant requires that the other C multi 79 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 80 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 81 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 82 around them properly. */ 71 {MC_TOKEN} return 83 {MC_TOKEN} return OTHER; 72 {INT} return 84 {INT} return INT; 73 {REAL} return 85 {REAL} return REAL; 74 86 75 "..." return 87 "..." return DOTS; 76 88 77 /* All other tokens are single characters. * 89 /* All other tokens are single characters. */ 78 . return 90 . return yytext[0]; 79 91 80 92 81 %% 93 %% 82 94 83 /* Bring in the keyword recognizer. */ 95 /* Bring in the keyword recognizer. */ 84 96 85 #include "keywords.c" !! 97 #include "keywords.hash.c" 86 98 87 99 88 /* Macros to append to our phrase collection l 100 /* Macros to append to our phrase collection list. */ 89 101 90 /* 102 /* 91 * We mark any token, that that equals to a kn 103 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this 104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union member 105 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 106 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, 107 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, s 108 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linu 109 * so far it was only observed in include/linux/telephony.h. 98 */ 110 */ 99 #define _APP(T,L) do { 111 #define _APP(T,L) do { \ 100 cur_node = next_node 112 cur_node = next_node; \ 101 next_node = xmalloc( 113 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 114 next_node->next = cur_node; \ 103 cur_node->string = m 115 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = 116 cur_node->tag = \ 105 find_symbol(cur_no 117 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : S 118 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_ 119 cur_node->in_source_file = in_source_file; \ 108 } while (0) 120 } while (0) 109 121 110 #define APP _APP(yytext, yyleng) 122 #define APP _APP(yytext, yyleng) 111 123 112 124 113 /* The second stage lexer. Here we incorporat 125 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 126 of the parser to tailor the tokens that are returned. */ 115 127 116 int 128 int 117 yylex(void) 129 yylex(void) 118 { 130 { 119 static enum { 131 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST 132 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST !! 133 ST_BRACKET, ST_BRACE, ST_EXPRESSION, >> 134 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, >> 135 ST_TABLE_5, ST_TABLE_6 122 } lexstate = ST_NOTSTARTED; 136 } lexstate = ST_NOTSTARTED; 123 137 124 static int suppress_type_lookup, dont_want_b 138 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 139 static struct string_list *next_node; 126 static char *source_file; << 127 140 128 int token, count = 0; 141 int token, count = 0; 129 struct string_list *cur_node; 142 struct string_list *cur_node; 130 143 131 if (lexstate == ST_NOTSTARTED) 144 if (lexstate == ST_NOTSTARTED) 132 { 145 { 133 next_node = xmalloc(sizeof(*next_node)); 146 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 147 next_node->next = NULL; 135 lexstate = ST_NORMAL; 148 lexstate = ST_NORMAL; 136 } 149 } 137 150 138 repeat: 151 repeat: 139 token = yylex1(); 152 token = yylex1(); 140 153 141 if (token == 0) 154 if (token == 0) 142 return 0; 155 return 0; 143 else if (token == FILENAME) 156 else if (token == FILENAME) 144 { 157 { 145 char *file, *e; 158 char *file, *e; 146 159 147 /* Save the filename and line number for 160 /* Save the filename and line number for later error messages. */ 148 161 149 if (cur_filename) 162 if (cur_filename) 150 free(cur_filename); 163 free(cur_filename); 151 164 152 file = strchr(yytext, '\"')+1; 165 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 166 e = strchr(file, '\"'); 154 *e = '\0'; 167 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 168 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 169 cur_line = atoi(yytext+2); 157 170 158 if (!source_file) { 171 if (!source_file) { 159 source_file = xstrdup(cur_filename); 172 source_file = xstrdup(cur_filename); 160 in_source_file = 1; 173 in_source_file = 1; 161 } else { 174 } else { 162 in_source_file = (strcmp(cur_filename, 175 in_source_file = (strcmp(cur_filename, source_file) == 0); 163 } 176 } 164 177 165 goto repeat; 178 goto repeat; 166 } 179 } 167 180 168 switch (lexstate) 181 switch (lexstate) 169 { 182 { 170 case ST_NORMAL: 183 case ST_NORMAL: 171 switch (token) 184 switch (token) 172 { 185 { 173 case IDENT: 186 case IDENT: 174 APP; 187 APP; 175 { 188 { 176 int r = is_reserved_word(yytext, y !! 189 const struct resword *r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) !! 190 if (r) 178 { 191 { 179 switch (token = r) !! 192 switch (token = r->token) 180 { 193 { 181 case ATTRIBUTE_KEYW: 194 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 195 lexstate = ST_ATTRIBUTE; 183 count = 0; 196 count = 0; 184 goto repeat; 197 goto repeat; 185 case ASM_KEYW: 198 case ASM_KEYW: 186 lexstate = ST_ASM; 199 lexstate = ST_ASM; 187 count = 0; 200 count = 0; 188 goto repeat; 201 goto repeat; 189 case TYPEOF_KEYW: 202 case TYPEOF_KEYW: 190 lexstate = ST_TYPEOF; 203 lexstate = ST_TYPEOF; 191 count = 0; 204 count = 0; 192 goto repeat; 205 goto repeat; 193 206 194 case STRUCT_KEYW: 207 case STRUCT_KEYW: 195 case UNION_KEYW: 208 case UNION_KEYW: 196 case ENUM_KEYW: 209 case ENUM_KEYW: 197 dont_want_brace_phrase = 3 210 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 211 suppress_type_lookup = 2; 199 goto fini; 212 goto fini; 200 213 201 case EXPORT_SYMBOL_KEYW: 214 case EXPORT_SYMBOL_KEYW: 202 goto fini; 215 goto fini; 203 << 204 case STATIC_ASSERT_KEYW: << 205 lexstate = ST_STATIC_ASSER << 206 count = 0; << 207 goto repeat; << 208 } 216 } 209 } 217 } 210 if (!suppress_type_lookup) 218 if (!suppress_type_lookup) 211 { 219 { 212 if (find_symbol(yytext, SYM_TY 220 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 213 token = TYPE; 221 token = TYPE; 214 } 222 } 215 } 223 } 216 break; 224 break; 217 225 218 case '[': 226 case '[': 219 APP; 227 APP; 220 lexstate = ST_BRACKET; 228 lexstate = ST_BRACKET; 221 count = 1; 229 count = 1; 222 goto repeat; 230 goto repeat; 223 231 224 case '{': 232 case '{': 225 APP; 233 APP; 226 if (dont_want_brace_phrase) 234 if (dont_want_brace_phrase) 227 break; 235 break; 228 lexstate = ST_BRACE; 236 lexstate = ST_BRACE; 229 count = 1; 237 count = 1; 230 goto repeat; 238 goto repeat; 231 239 232 case '=': case ':': 240 case '=': case ':': 233 APP; 241 APP; 234 lexstate = ST_EXPRESSION; 242 lexstate = ST_EXPRESSION; 235 break; 243 break; 236 244 >> 245 case DOTS: 237 default: 246 default: 238 APP; 247 APP; 239 break; 248 break; 240 } 249 } 241 break; 250 break; 242 251 243 case ST_ATTRIBUTE: 252 case ST_ATTRIBUTE: 244 APP; 253 APP; 245 switch (token) 254 switch (token) 246 { 255 { 247 case '(': 256 case '(': 248 ++count; 257 ++count; 249 goto repeat; 258 goto repeat; 250 case ')': 259 case ')': 251 if (--count == 0) 260 if (--count == 0) 252 { 261 { 253 lexstate = ST_NORMAL; 262 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 263 token = ATTRIBUTE_PHRASE; 255 break; 264 break; 256 } 265 } 257 goto repeat; 266 goto repeat; 258 default: 267 default: 259 goto repeat; 268 goto repeat; 260 } 269 } 261 break; 270 break; 262 271 263 case ST_ASM: 272 case ST_ASM: 264 APP; 273 APP; 265 switch (token) 274 switch (token) 266 { 275 { 267 case '(': 276 case '(': 268 ++count; 277 ++count; 269 goto repeat; 278 goto repeat; 270 case ')': 279 case ')': 271 if (--count == 0) 280 if (--count == 0) 272 { 281 { 273 lexstate = ST_NORMAL; 282 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 283 token = ASM_PHRASE; 275 break; 284 break; 276 } 285 } 277 goto repeat; 286 goto repeat; 278 default: 287 default: 279 goto repeat; 288 goto repeat; 280 } 289 } 281 break; 290 break; 282 291 283 case ST_TYPEOF_1: << 284 if (token == IDENT) << 285 { << 286 if (is_reserved_word(yytext, yyleng) << 287 || find_symbol(yytext, SYM_TYPED << 288 { << 289 yyless(0); << 290 unput('('); << 291 lexstate = ST_NORMAL; << 292 token = TYPEOF_KEYW; << 293 break; << 294 } << 295 _APP("(", 1); << 296 } << 297 lexstate = ST_TYPEOF; << 298 /* FALLTHRU */ << 299 << 300 case ST_TYPEOF: 292 case ST_TYPEOF: 301 switch (token) 293 switch (token) 302 { 294 { 303 case '(': 295 case '(': 304 if ( ++count == 1 ) 296 if ( ++count == 1 ) 305 lexstate = ST_TYPEOF_1; 297 lexstate = ST_TYPEOF_1; 306 else 298 else 307 APP; 299 APP; 308 goto repeat; 300 goto repeat; 309 case ')': 301 case ')': 310 APP; 302 APP; 311 if (--count == 0) 303 if (--count == 0) 312 { 304 { 313 lexstate = ST_NORMAL; 305 lexstate = ST_NORMAL; 314 token = TYPEOF_PHRASE; 306 token = TYPEOF_PHRASE; 315 break; 307 break; 316 } 308 } 317 goto repeat; 309 goto repeat; 318 default: 310 default: 319 APP; 311 APP; 320 goto repeat; 312 goto repeat; 321 } 313 } 322 break; 314 break; 323 315 >> 316 case ST_TYPEOF_1: >> 317 if (token == IDENT) >> 318 { >> 319 if (is_reserved_word(yytext, yyleng) >> 320 || find_symbol(yytext, SYM_TYPEDEF, 1)) >> 321 { >> 322 yyless(0); >> 323 unput('('); >> 324 lexstate = ST_NORMAL; >> 325 token = TYPEOF_KEYW; >> 326 break; >> 327 } >> 328 _APP("(", 1); >> 329 } >> 330 APP; >> 331 lexstate = ST_TYPEOF; >> 332 goto repeat; >> 333 324 case ST_BRACKET: 334 case ST_BRACKET: 325 APP; 335 APP; 326 switch (token) 336 switch (token) 327 { 337 { 328 case '[': 338 case '[': 329 ++count; 339 ++count; 330 goto repeat; 340 goto repeat; 331 case ']': 341 case ']': 332 if (--count == 0) 342 if (--count == 0) 333 { 343 { 334 lexstate = ST_NORMAL; 344 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 345 token = BRACKET_PHRASE; 336 break; 346 break; 337 } 347 } 338 goto repeat; 348 goto repeat; 339 default: 349 default: 340 goto repeat; 350 goto repeat; 341 } 351 } 342 break; 352 break; 343 353 344 case ST_BRACE: 354 case ST_BRACE: 345 APP; 355 APP; 346 switch (token) 356 switch (token) 347 { 357 { 348 case '{': 358 case '{': 349 ++count; 359 ++count; 350 goto repeat; 360 goto repeat; 351 case '}': 361 case '}': 352 if (--count == 0) 362 if (--count == 0) 353 { 363 { 354 lexstate = ST_NORMAL; 364 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 365 token = BRACE_PHRASE; 356 break; 366 break; 357 } 367 } 358 goto repeat; 368 goto repeat; 359 default: 369 default: 360 goto repeat; 370 goto repeat; 361 } 371 } 362 break; 372 break; 363 373 364 case ST_EXPRESSION: 374 case ST_EXPRESSION: 365 switch (token) 375 switch (token) 366 { 376 { 367 case '(': case '[': case '{': 377 case '(': case '[': case '{': 368 ++count; 378 ++count; 369 APP; 379 APP; 370 goto repeat; 380 goto repeat; 371 case '}': 381 case '}': 372 /* is this the last line of an enum 382 /* is this the last line of an enum declaration? */ 373 if (count == 0) 383 if (count == 0) 374 { 384 { 375 /* Put back the token we just re 385 /* Put back the token we just read so's we can find it again 376 after registering the express 386 after registering the expression. */ 377 unput(token); 387 unput(token); 378 388 379 lexstate = ST_NORMAL; 389 lexstate = ST_NORMAL; 380 token = EXPRESSION_PHRASE; 390 token = EXPRESSION_PHRASE; 381 break; 391 break; 382 } 392 } 383 /* FALLTHRU */ 393 /* FALLTHRU */ 384 case ')': case ']': 394 case ')': case ']': 385 --count; 395 --count; 386 APP; 396 APP; 387 goto repeat; 397 goto repeat; 388 case ',': case ';': 398 case ',': case ';': 389 if (count == 0) 399 if (count == 0) 390 { 400 { 391 /* Put back the token we just re 401 /* Put back the token we just read so's we can find it again 392 after registering the express 402 after registering the expression. */ 393 unput(token); 403 unput(token); 394 404 395 lexstate = ST_NORMAL; 405 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 406 token = EXPRESSION_PHRASE; 397 break; 407 break; 398 } 408 } 399 APP; 409 APP; 400 goto repeat; 410 goto repeat; 401 default: 411 default: 402 APP; 412 APP; 403 goto repeat; 413 goto repeat; 404 } 414 } 405 break; 415 break; 406 416 407 case ST_STATIC_ASSERT: !! 417 case ST_TABLE_1: 408 APP; !! 418 goto repeat; >> 419 >> 420 case ST_TABLE_2: >> 421 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') >> 422 { >> 423 token = EXPORT_SYMBOL_KEYW; >> 424 lexstate = ST_TABLE_5; >> 425 APP; >> 426 break; >> 427 } >> 428 lexstate = ST_TABLE_6; >> 429 /* FALLTHRU */ >> 430 >> 431 case ST_TABLE_6: 409 switch (token) 432 switch (token) 410 { 433 { 411 case '(': !! 434 case '{': case '[': case '(': 412 ++count; 435 ++count; 413 goto repeat; !! 436 break; 414 case ')': !! 437 case '}': case ']': case ')': 415 if (--count == 0) !! 438 --count; 416 { !! 439 break; 417 lexstate = ST_NORMAL; !! 440 case ',': 418 token = STATIC_ASSERT_PHRASE; !! 441 if (count == 0) 419 break; !! 442 lexstate = ST_TABLE_2; 420 } !! 443 break; 421 goto repeat; !! 444 }; >> 445 goto repeat; >> 446 >> 447 case ST_TABLE_3: >> 448 goto repeat; >> 449 >> 450 case ST_TABLE_4: >> 451 if (token == ';') >> 452 lexstate = ST_NORMAL; >> 453 goto repeat; >> 454 >> 455 case ST_TABLE_5: >> 456 switch (token) >> 457 { >> 458 case ',': >> 459 token = ';'; >> 460 lexstate = ST_TABLE_2; >> 461 APP; >> 462 break; 422 default: 463 default: 423 goto repeat; !! 464 APP; >> 465 break; 424 } 466 } 425 break; 467 break; 426 468 427 default: 469 default: 428 exit(1); 470 exit(1); 429 } 471 } 430 fini: 472 fini: 431 473 432 if (suppress_type_lookup > 0) 474 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 475 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 476 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 477 --dont_want_brace_phrase; 436 478 437 yylval = &next_node->next; 479 yylval = &next_node->next; 438 480 439 return token; 481 return token; 440 } 482 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.