1 /* SPDX-License-Identifier: GPL-2.0-or-later * !! 1 /* Lexical analysis for genksyms. 2 /* !! 2 Copyright 1996, 1997 Linux International. 3 * Lexical analysis for genksyms. !! 3 4 * Copyright 1996, 1997 Linux International. !! 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 * !! 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 * New implementation contributed by Richard H< !! 6 7 * Based on original work by Bjorn Ekwall <bj0r !! 7 Taken from Linux modutils 2.4.22. 8 * !! 8 9 * Taken from Linux modutils 2.4.22. !! 9 This program is free software; you can redistribute it and/or modify it 10 */ !! 10 under the terms of the GNU General Public License as published by the >> 11 Free Software Foundation; either version 2 of the License, or (at your >> 12 option) any later version. >> 13 >> 14 This program is distributed in the hope that it will be useful, but >> 15 WITHOUT ANY WARRANTY; without even the implied warranty of >> 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> 17 General Public License for more details. >> 18 >> 19 You should have received a copy of the GNU General Public License >> 20 along with this program; if not, write to the Free Software Foundation, >> 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ >> 22 11 23 12 %{ 24 %{ 13 25 14 #include <limits.h> 26 #include <limits.h> 15 #include <stdlib.h> 27 #include <stdlib.h> 16 #include <string.h> 28 #include <string.h> 17 #include <ctype.h> 29 #include <ctype.h> 18 30 19 #include "genksyms.h" 31 #include "genksyms.h" 20 #include "parse.tab.h" 32 #include "parse.tab.h" 21 33 22 /* We've got a two-level lexer here. We let f 34 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 35 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 36 #define YY_DECL static int yylex1(void) 25 37 26 %} 38 %} 27 39 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 41 30 O_INT 0[0-7]* 42 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 43 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 44 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 47 36 FRAC ([0-9]*\.[0-9]+)|([0-9 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 49 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 50 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 52 41 STRING L?\"([^\\\"]*\\.)*[^\\ 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 55 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 57 46 /* We don't do multiple input files. */ 58 /* We don't do multiple input files. */ 47 %option noyywrap 59 %option noyywrap 48 60 49 %option noinput 61 %option noinput 50 62 51 %% 63 %% 52 64 53 65 54 /* Keep track of our location in the original 66 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 67 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 68 ^#.*\n cur_line++; 57 \n cur_li 69 \n cur_line++; 58 70 59 /* Ignore all other whitespace. */ 71 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 72 [ \t\f\v\r]+ ; 61 73 62 74 63 {STRING} return 75 {STRING} return STRING; 64 {CHAR} return 76 {CHAR} return CHAR; 65 {IDENT} return 77 {IDENT} return IDENT; 66 78 67 /* The Pedant requires that the other C multi 79 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 80 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 81 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 82 around them properly. */ 71 {MC_TOKEN} return 83 {MC_TOKEN} return OTHER; 72 {INT} return 84 {INT} return INT; 73 {REAL} return 85 {REAL} return REAL; 74 86 75 "..." return 87 "..." return DOTS; 76 88 77 /* All other tokens are single characters. * 89 /* All other tokens are single characters. */ 78 . return 90 . return yytext[0]; 79 91 80 92 81 %% 93 %% 82 94 83 /* Bring in the keyword recognizer. */ 95 /* Bring in the keyword recognizer. */ 84 96 85 #include "keywords.c" 97 #include "keywords.c" 86 98 87 99 88 /* Macros to append to our phrase collection l 100 /* Macros to append to our phrase collection list. */ 89 101 90 /* 102 /* 91 * We mark any token, that that equals to a kn 103 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this 104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union member 105 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 106 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, 107 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, s 108 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linu 109 * so far it was only observed in include/linux/telephony.h. 98 */ 110 */ 99 #define _APP(T,L) do { 111 #define _APP(T,L) do { \ 100 cur_node = next_node 112 cur_node = next_node; \ 101 next_node = xmalloc( 113 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 114 next_node->next = cur_node; \ 103 cur_node->string = m 115 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = 116 cur_node->tag = \ 105 find_symbol(cur_no 117 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : S 118 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_ 119 cur_node->in_source_file = in_source_file; \ 108 } while (0) 120 } while (0) 109 121 110 #define APP _APP(yytext, yyleng) 122 #define APP _APP(yytext, yyleng) 111 123 112 124 113 /* The second stage lexer. Here we incorporat 125 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 126 of the parser to tailor the tokens that are returned. */ 115 127 116 int 128 int 117 yylex(void) 129 yylex(void) 118 { 130 { 119 static enum { 131 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST 132 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST !! 133 ST_BRACKET, ST_BRACE, ST_EXPRESSION, >> 134 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, >> 135 ST_TABLE_5, ST_TABLE_6 122 } lexstate = ST_NOTSTARTED; 136 } lexstate = ST_NOTSTARTED; 123 137 124 static int suppress_type_lookup, dont_want_b 138 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 139 static struct string_list *next_node; 126 static char *source_file; << 127 140 128 int token, count = 0; 141 int token, count = 0; 129 struct string_list *cur_node; 142 struct string_list *cur_node; 130 143 131 if (lexstate == ST_NOTSTARTED) 144 if (lexstate == ST_NOTSTARTED) 132 { 145 { 133 next_node = xmalloc(sizeof(*next_node)); 146 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 147 next_node->next = NULL; 135 lexstate = ST_NORMAL; 148 lexstate = ST_NORMAL; 136 } 149 } 137 150 138 repeat: 151 repeat: 139 token = yylex1(); 152 token = yylex1(); 140 153 141 if (token == 0) 154 if (token == 0) 142 return 0; 155 return 0; 143 else if (token == FILENAME) 156 else if (token == FILENAME) 144 { 157 { 145 char *file, *e; 158 char *file, *e; 146 159 147 /* Save the filename and line number for 160 /* Save the filename and line number for later error messages. */ 148 161 149 if (cur_filename) 162 if (cur_filename) 150 free(cur_filename); 163 free(cur_filename); 151 164 152 file = strchr(yytext, '\"')+1; 165 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 166 e = strchr(file, '\"'); 154 *e = '\0'; 167 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 168 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 169 cur_line = atoi(yytext+2); 157 170 158 if (!source_file) { 171 if (!source_file) { 159 source_file = xstrdup(cur_filename); 172 source_file = xstrdup(cur_filename); 160 in_source_file = 1; 173 in_source_file = 1; 161 } else { 174 } else { 162 in_source_file = (strcmp(cur_filename, 175 in_source_file = (strcmp(cur_filename, source_file) == 0); 163 } 176 } 164 177 165 goto repeat; 178 goto repeat; 166 } 179 } 167 180 168 switch (lexstate) 181 switch (lexstate) 169 { 182 { 170 case ST_NORMAL: 183 case ST_NORMAL: 171 switch (token) 184 switch (token) 172 { 185 { 173 case IDENT: 186 case IDENT: 174 APP; 187 APP; 175 { 188 { 176 int r = is_reserved_word(yytext, y 189 int r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) 190 if (r >= 0) 178 { 191 { 179 switch (token = r) 192 switch (token = r) 180 { 193 { 181 case ATTRIBUTE_KEYW: 194 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 195 lexstate = ST_ATTRIBUTE; 183 count = 0; 196 count = 0; 184 goto repeat; 197 goto repeat; 185 case ASM_KEYW: 198 case ASM_KEYW: 186 lexstate = ST_ASM; 199 lexstate = ST_ASM; 187 count = 0; 200 count = 0; 188 goto repeat; 201 goto repeat; 189 case TYPEOF_KEYW: 202 case TYPEOF_KEYW: 190 lexstate = ST_TYPEOF; 203 lexstate = ST_TYPEOF; 191 count = 0; 204 count = 0; 192 goto repeat; 205 goto repeat; 193 206 194 case STRUCT_KEYW: 207 case STRUCT_KEYW: 195 case UNION_KEYW: 208 case UNION_KEYW: 196 case ENUM_KEYW: 209 case ENUM_KEYW: 197 dont_want_brace_phrase = 3 210 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 211 suppress_type_lookup = 2; 199 goto fini; 212 goto fini; 200 213 201 case EXPORT_SYMBOL_KEYW: 214 case EXPORT_SYMBOL_KEYW: 202 goto fini; 215 goto fini; 203 << 204 case STATIC_ASSERT_KEYW: << 205 lexstate = ST_STATIC_ASSER << 206 count = 0; << 207 goto repeat; << 208 } 216 } 209 } 217 } 210 if (!suppress_type_lookup) 218 if (!suppress_type_lookup) 211 { 219 { 212 if (find_symbol(yytext, SYM_TY 220 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 213 token = TYPE; 221 token = TYPE; 214 } 222 } 215 } 223 } 216 break; 224 break; 217 225 218 case '[': 226 case '[': 219 APP; 227 APP; 220 lexstate = ST_BRACKET; 228 lexstate = ST_BRACKET; 221 count = 1; 229 count = 1; 222 goto repeat; 230 goto repeat; 223 231 224 case '{': 232 case '{': 225 APP; 233 APP; 226 if (dont_want_brace_phrase) 234 if (dont_want_brace_phrase) 227 break; 235 break; 228 lexstate = ST_BRACE; 236 lexstate = ST_BRACE; 229 count = 1; 237 count = 1; 230 goto repeat; 238 goto repeat; 231 239 232 case '=': case ':': 240 case '=': case ':': 233 APP; 241 APP; 234 lexstate = ST_EXPRESSION; 242 lexstate = ST_EXPRESSION; 235 break; 243 break; 236 244 >> 245 case DOTS: 237 default: 246 default: 238 APP; 247 APP; 239 break; 248 break; 240 } 249 } 241 break; 250 break; 242 251 243 case ST_ATTRIBUTE: 252 case ST_ATTRIBUTE: 244 APP; 253 APP; 245 switch (token) 254 switch (token) 246 { 255 { 247 case '(': 256 case '(': 248 ++count; 257 ++count; 249 goto repeat; 258 goto repeat; 250 case ')': 259 case ')': 251 if (--count == 0) 260 if (--count == 0) 252 { 261 { 253 lexstate = ST_NORMAL; 262 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 263 token = ATTRIBUTE_PHRASE; 255 break; 264 break; 256 } 265 } 257 goto repeat; 266 goto repeat; 258 default: 267 default: 259 goto repeat; 268 goto repeat; 260 } 269 } 261 break; 270 break; 262 271 263 case ST_ASM: 272 case ST_ASM: 264 APP; 273 APP; 265 switch (token) 274 switch (token) 266 { 275 { 267 case '(': 276 case '(': 268 ++count; 277 ++count; 269 goto repeat; 278 goto repeat; 270 case ')': 279 case ')': 271 if (--count == 0) 280 if (--count == 0) 272 { 281 { 273 lexstate = ST_NORMAL; 282 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 283 token = ASM_PHRASE; 275 break; 284 break; 276 } 285 } 277 goto repeat; 286 goto repeat; 278 default: 287 default: 279 goto repeat; 288 goto repeat; 280 } 289 } 281 break; 290 break; 282 291 283 case ST_TYPEOF_1: 292 case ST_TYPEOF_1: 284 if (token == IDENT) 293 if (token == IDENT) 285 { 294 { 286 if (is_reserved_word(yytext, yyleng) 295 if (is_reserved_word(yytext, yyleng) >= 0 287 || find_symbol(yytext, SYM_TYPED 296 || find_symbol(yytext, SYM_TYPEDEF, 1)) 288 { 297 { 289 yyless(0); 298 yyless(0); 290 unput('('); 299 unput('('); 291 lexstate = ST_NORMAL; 300 lexstate = ST_NORMAL; 292 token = TYPEOF_KEYW; 301 token = TYPEOF_KEYW; 293 break; 302 break; 294 } 303 } 295 _APP("(", 1); 304 _APP("(", 1); 296 } 305 } 297 lexstate = ST_TYPEOF; 306 lexstate = ST_TYPEOF; 298 /* FALLTHRU */ 307 /* FALLTHRU */ 299 308 300 case ST_TYPEOF: 309 case ST_TYPEOF: 301 switch (token) 310 switch (token) 302 { 311 { 303 case '(': 312 case '(': 304 if ( ++count == 1 ) 313 if ( ++count == 1 ) 305 lexstate = ST_TYPEOF_1; 314 lexstate = ST_TYPEOF_1; 306 else 315 else 307 APP; 316 APP; 308 goto repeat; 317 goto repeat; 309 case ')': 318 case ')': 310 APP; 319 APP; 311 if (--count == 0) 320 if (--count == 0) 312 { 321 { 313 lexstate = ST_NORMAL; 322 lexstate = ST_NORMAL; 314 token = TYPEOF_PHRASE; 323 token = TYPEOF_PHRASE; 315 break; 324 break; 316 } 325 } 317 goto repeat; 326 goto repeat; 318 default: 327 default: 319 APP; 328 APP; 320 goto repeat; 329 goto repeat; 321 } 330 } 322 break; 331 break; 323 332 324 case ST_BRACKET: 333 case ST_BRACKET: 325 APP; 334 APP; 326 switch (token) 335 switch (token) 327 { 336 { 328 case '[': 337 case '[': 329 ++count; 338 ++count; 330 goto repeat; 339 goto repeat; 331 case ']': 340 case ']': 332 if (--count == 0) 341 if (--count == 0) 333 { 342 { 334 lexstate = ST_NORMAL; 343 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 344 token = BRACKET_PHRASE; 336 break; 345 break; 337 } 346 } 338 goto repeat; 347 goto repeat; 339 default: 348 default: 340 goto repeat; 349 goto repeat; 341 } 350 } 342 break; 351 break; 343 352 344 case ST_BRACE: 353 case ST_BRACE: 345 APP; 354 APP; 346 switch (token) 355 switch (token) 347 { 356 { 348 case '{': 357 case '{': 349 ++count; 358 ++count; 350 goto repeat; 359 goto repeat; 351 case '}': 360 case '}': 352 if (--count == 0) 361 if (--count == 0) 353 { 362 { 354 lexstate = ST_NORMAL; 363 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 364 token = BRACE_PHRASE; 356 break; 365 break; 357 } 366 } 358 goto repeat; 367 goto repeat; 359 default: 368 default: 360 goto repeat; 369 goto repeat; 361 } 370 } 362 break; 371 break; 363 372 364 case ST_EXPRESSION: 373 case ST_EXPRESSION: 365 switch (token) 374 switch (token) 366 { 375 { 367 case '(': case '[': case '{': 376 case '(': case '[': case '{': 368 ++count; 377 ++count; 369 APP; 378 APP; 370 goto repeat; 379 goto repeat; 371 case '}': 380 case '}': 372 /* is this the last line of an enum 381 /* is this the last line of an enum declaration? */ 373 if (count == 0) 382 if (count == 0) 374 { 383 { 375 /* Put back the token we just re 384 /* Put back the token we just read so's we can find it again 376 after registering the express 385 after registering the expression. */ 377 unput(token); 386 unput(token); 378 387 379 lexstate = ST_NORMAL; 388 lexstate = ST_NORMAL; 380 token = EXPRESSION_PHRASE; 389 token = EXPRESSION_PHRASE; 381 break; 390 break; 382 } 391 } 383 /* FALLTHRU */ 392 /* FALLTHRU */ 384 case ')': case ']': 393 case ')': case ']': 385 --count; 394 --count; 386 APP; 395 APP; 387 goto repeat; 396 goto repeat; 388 case ',': case ';': 397 case ',': case ';': 389 if (count == 0) 398 if (count == 0) 390 { 399 { 391 /* Put back the token we just re 400 /* Put back the token we just read so's we can find it again 392 after registering the express 401 after registering the expression. */ 393 unput(token); 402 unput(token); 394 403 395 lexstate = ST_NORMAL; 404 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 405 token = EXPRESSION_PHRASE; 397 break; 406 break; 398 } 407 } 399 APP; 408 APP; 400 goto repeat; 409 goto repeat; 401 default: 410 default: 402 APP; 411 APP; 403 goto repeat; 412 goto repeat; 404 } 413 } 405 break; 414 break; 406 415 407 case ST_STATIC_ASSERT: !! 416 case ST_TABLE_1: 408 APP; !! 417 goto repeat; >> 418 >> 419 case ST_TABLE_2: >> 420 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') >> 421 { >> 422 token = EXPORT_SYMBOL_KEYW; >> 423 lexstate = ST_TABLE_5; >> 424 APP; >> 425 break; >> 426 } >> 427 lexstate = ST_TABLE_6; >> 428 /* FALLTHRU */ >> 429 >> 430 case ST_TABLE_6: 409 switch (token) 431 switch (token) 410 { 432 { 411 case '(': !! 433 case '{': case '[': case '(': 412 ++count; 434 ++count; 413 goto repeat; !! 435 break; 414 case ')': !! 436 case '}': case ']': case ')': 415 if (--count == 0) !! 437 --count; 416 { !! 438 break; 417 lexstate = ST_NORMAL; !! 439 case ',': 418 token = STATIC_ASSERT_PHRASE; !! 440 if (count == 0) 419 break; !! 441 lexstate = ST_TABLE_2; 420 } !! 442 break; 421 goto repeat; !! 443 }; >> 444 goto repeat; >> 445 >> 446 case ST_TABLE_3: >> 447 goto repeat; >> 448 >> 449 case ST_TABLE_4: >> 450 if (token == ';') >> 451 lexstate = ST_NORMAL; >> 452 goto repeat; >> 453 >> 454 case ST_TABLE_5: >> 455 switch (token) >> 456 { >> 457 case ',': >> 458 token = ';'; >> 459 lexstate = ST_TABLE_2; >> 460 APP; >> 461 break; 422 default: 462 default: 423 goto repeat; !! 463 APP; >> 464 break; 424 } 465 } 425 break; 466 break; 426 467 427 default: 468 default: 428 exit(1); 469 exit(1); 429 } 470 } 430 fini: 471 fini: 431 472 432 if (suppress_type_lookup > 0) 473 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 474 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 475 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 476 --dont_want_brace_phrase; 436 477 437 yylval = &next_node->next; 478 yylval = &next_node->next; 438 479 439 return token; 480 return token; 440 } 481 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.