1 /* SPDX-License-Identifier: GPL-2.0-or-later * !! 1 /* Lexical analysis for genksyms. 2 /* !! 2 Copyright 1996, 1997 Linux International. 3 * Lexical analysis for genksyms. !! 3 4 * Copyright 1996, 1997 Linux International. !! 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 * !! 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 * New implementation contributed by Richard H< !! 6 7 * Based on original work by Bjorn Ekwall <bj0r !! 7 Taken from Linux modutils 2.4.22. 8 * !! 8 9 * Taken from Linux modutils 2.4.22. !! 9 This program is free software; you can redistribute it and/or modify it 10 */ !! 10 under the terms of the GNU General Public License as published by the >> 11 Free Software Foundation; either version 2 of the License, or (at your >> 12 option) any later version. >> 13 >> 14 This program is distributed in the hope that it will be useful, but >> 15 WITHOUT ANY WARRANTY; without even the implied warranty of >> 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> 17 General Public License for more details. >> 18 >> 19 You should have received a copy of the GNU General Public License >> 20 along with this program; if not, write to the Free Software Foundation, >> 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ >> 22 11 23 12 %{ 24 %{ 13 25 14 #include <limits.h> 26 #include <limits.h> 15 #include <stdlib.h> 27 #include <stdlib.h> 16 #include <string.h> 28 #include <string.h> 17 #include <ctype.h> 29 #include <ctype.h> 18 30 19 #include "genksyms.h" 31 #include "genksyms.h" 20 #include "parse.tab.h" !! 32 #include "parse.h" 21 33 22 /* We've got a two-level lexer here. We let f 34 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 35 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 36 #define YY_DECL static int yylex1(void) 25 37 26 %} 38 %} 27 39 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 41 30 O_INT 0[0-7]* 42 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 43 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 44 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 47 36 FRAC ([0-9]*\.[0-9]+)|([0-9 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 49 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 50 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 52 41 STRING L?\"([^\\\"]*\\.)*[^\\ 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 55 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 57 >> 58 /* Version 2 checksumming does proper tokenization; version 1 wasn't >> 59 quite so pedantic. */ >> 60 %s V2_TOKENS >> 61 46 /* We don't do multiple input files. */ 62 /* We don't do multiple input files. */ 47 %option noyywrap 63 %option noyywrap 48 64 49 %option noinput << 50 << 51 %% 65 %% 52 66 53 67 54 /* Keep track of our location in the original 68 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 69 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 70 ^#.*\n cur_line++; 57 \n cur_li 71 \n cur_line++; 58 72 59 /* Ignore all other whitespace. */ 73 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 74 [ \t\f\v\r]+ ; 61 75 62 76 63 {STRING} return 77 {STRING} return STRING; 64 {CHAR} return 78 {CHAR} return CHAR; 65 {IDENT} return 79 {IDENT} return IDENT; 66 80 67 /* The Pedant requires that the other C multi 81 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 82 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 83 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 84 around them properly. */ 71 {MC_TOKEN} return !! 85 <V2_TOKENS>{MC_TOKEN} return OTHER; 72 {INT} return !! 86 <V2_TOKENS>{INT} return INT; 73 {REAL} return !! 87 <V2_TOKENS>{REAL} return REAL; 74 88 75 "..." return 89 "..." return DOTS; 76 90 77 /* All other tokens are single characters. * 91 /* All other tokens are single characters. */ 78 . return 92 . return yytext[0]; 79 93 80 94 81 %% 95 %% 82 96 83 /* Bring in the keyword recognizer. */ 97 /* Bring in the keyword recognizer. */ 84 98 85 #include "keywords.c" 99 #include "keywords.c" 86 100 87 101 88 /* Macros to append to our phrase collection l 102 /* Macros to append to our phrase collection list. */ 89 103 90 /* << 91 * We mark any token, that that equals to a kn << 92 * SYM_ENUM_CONST. The parser will change this << 93 * the only problem is struct and union member << 94 * enum e { a, b }; struct s { int a, b; } << 95 * but in this case, the only effect will be, << 96 * more volatile, which is acceptable. Also, s << 97 * so far it was only observed in include/linu << 98 */ << 99 #define _APP(T,L) do { 104 #define _APP(T,L) do { \ 100 cur_node = next_node 105 cur_node = next_node; \ 101 next_node = xmalloc( 106 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 107 next_node->next = cur_node; \ 103 cur_node->string = m 108 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = !! 109 cur_node->tag = SYM_NORMAL; \ 105 find_symbol(cur_no << 106 SYM_ENUM_CONST : S << 107 cur_node->in_source_ << 108 } while (0) 110 } while (0) 109 111 110 #define APP _APP(yytext, yyleng) 112 #define APP _APP(yytext, yyleng) 111 113 112 114 113 /* The second stage lexer. Here we incorporat 115 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 116 of the parser to tailor the tokens that are returned. */ 115 117 116 int 118 int 117 yylex(void) 119 yylex(void) 118 { 120 { 119 static enum { 121 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST !! 122 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST !! 123 ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, >> 124 ST_TABLE_5, ST_TABLE_6 122 } lexstate = ST_NOTSTARTED; 125 } lexstate = ST_NOTSTARTED; 123 126 124 static int suppress_type_lookup, dont_want_b 127 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 128 static struct string_list *next_node; 126 static char *source_file; << 127 129 128 int token, count = 0; 130 int token, count = 0; 129 struct string_list *cur_node; 131 struct string_list *cur_node; 130 132 131 if (lexstate == ST_NOTSTARTED) 133 if (lexstate == ST_NOTSTARTED) 132 { 134 { >> 135 BEGIN(V2_TOKENS); 133 next_node = xmalloc(sizeof(*next_node)); 136 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 137 next_node->next = NULL; 135 lexstate = ST_NORMAL; 138 lexstate = ST_NORMAL; 136 } 139 } 137 140 138 repeat: 141 repeat: 139 token = yylex1(); 142 token = yylex1(); 140 143 141 if (token == 0) 144 if (token == 0) 142 return 0; 145 return 0; 143 else if (token == FILENAME) 146 else if (token == FILENAME) 144 { 147 { 145 char *file, *e; 148 char *file, *e; 146 149 147 /* Save the filename and line number for 150 /* Save the filename and line number for later error messages. */ 148 151 149 if (cur_filename) 152 if (cur_filename) 150 free(cur_filename); 153 free(cur_filename); 151 154 152 file = strchr(yytext, '\"')+1; 155 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 156 e = strchr(file, '\"'); 154 *e = '\0'; 157 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 158 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 159 cur_line = atoi(yytext+2); 157 160 158 if (!source_file) { << 159 source_file = xstrdup(cur_filename); << 160 in_source_file = 1; << 161 } else { << 162 in_source_file = (strcmp(cur_filename, << 163 } << 164 << 165 goto repeat; 161 goto repeat; 166 } 162 } 167 163 168 switch (lexstate) 164 switch (lexstate) 169 { 165 { 170 case ST_NORMAL: 166 case ST_NORMAL: 171 switch (token) 167 switch (token) 172 { 168 { 173 case IDENT: 169 case IDENT: 174 APP; 170 APP; 175 { 171 { 176 int r = is_reserved_word(yytext, y !! 172 const struct resword *r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) !! 173 if (r) 178 { 174 { 179 switch (token = r) !! 175 switch (token = r->token) 180 { 176 { 181 case ATTRIBUTE_KEYW: 177 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 178 lexstate = ST_ATTRIBUTE; 183 count = 0; 179 count = 0; 184 goto repeat; 180 goto repeat; 185 case ASM_KEYW: 181 case ASM_KEYW: 186 lexstate = ST_ASM; 182 lexstate = ST_ASM; 187 count = 0; 183 count = 0; 188 goto repeat; 184 goto repeat; 189 case TYPEOF_KEYW: << 190 lexstate = ST_TYPEOF; << 191 count = 0; << 192 goto repeat; << 193 185 194 case STRUCT_KEYW: 186 case STRUCT_KEYW: 195 case UNION_KEYW: 187 case UNION_KEYW: 196 case ENUM_KEYW: << 197 dont_want_brace_phrase = 3 188 dont_want_brace_phrase = 3; >> 189 case ENUM_KEYW: 198 suppress_type_lookup = 2; 190 suppress_type_lookup = 2; 199 goto fini; 191 goto fini; 200 192 201 case EXPORT_SYMBOL_KEYW: 193 case EXPORT_SYMBOL_KEYW: 202 goto fini; 194 goto fini; 203 << 204 case STATIC_ASSERT_KEYW: << 205 lexstate = ST_STATIC_ASSER << 206 count = 0; << 207 goto repeat; << 208 } 195 } 209 } 196 } 210 if (!suppress_type_lookup) 197 if (!suppress_type_lookup) 211 { 198 { 212 if (find_symbol(yytext, SYM_TY !! 199 struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF); >> 200 if (sym && sym->type == SYM_TYPEDEF) 213 token = TYPE; 201 token = TYPE; 214 } 202 } 215 } 203 } 216 break; 204 break; 217 205 218 case '[': 206 case '[': 219 APP; 207 APP; 220 lexstate = ST_BRACKET; 208 lexstate = ST_BRACKET; 221 count = 1; 209 count = 1; 222 goto repeat; 210 goto repeat; 223 211 224 case '{': 212 case '{': 225 APP; 213 APP; 226 if (dont_want_brace_phrase) 214 if (dont_want_brace_phrase) 227 break; 215 break; 228 lexstate = ST_BRACE; 216 lexstate = ST_BRACE; 229 count = 1; 217 count = 1; 230 goto repeat; 218 goto repeat; 231 219 232 case '=': case ':': 220 case '=': case ':': 233 APP; 221 APP; 234 lexstate = ST_EXPRESSION; 222 lexstate = ST_EXPRESSION; 235 break; 223 break; 236 224 >> 225 case DOTS: 237 default: 226 default: 238 APP; 227 APP; 239 break; 228 break; 240 } 229 } 241 break; 230 break; 242 231 243 case ST_ATTRIBUTE: 232 case ST_ATTRIBUTE: 244 APP; 233 APP; 245 switch (token) 234 switch (token) 246 { 235 { 247 case '(': 236 case '(': 248 ++count; 237 ++count; 249 goto repeat; 238 goto repeat; 250 case ')': 239 case ')': 251 if (--count == 0) 240 if (--count == 0) 252 { 241 { 253 lexstate = ST_NORMAL; 242 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 243 token = ATTRIBUTE_PHRASE; 255 break; 244 break; 256 } 245 } 257 goto repeat; 246 goto repeat; 258 default: 247 default: 259 goto repeat; 248 goto repeat; 260 } 249 } 261 break; 250 break; 262 251 263 case ST_ASM: 252 case ST_ASM: 264 APP; 253 APP; 265 switch (token) 254 switch (token) 266 { 255 { 267 case '(': 256 case '(': 268 ++count; 257 ++count; 269 goto repeat; 258 goto repeat; 270 case ')': 259 case ')': 271 if (--count == 0) 260 if (--count == 0) 272 { 261 { 273 lexstate = ST_NORMAL; 262 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 263 token = ASM_PHRASE; 275 break; 264 break; 276 } 265 } 277 goto repeat; 266 goto repeat; 278 default: 267 default: 279 goto repeat; 268 goto repeat; 280 } 269 } 281 break; 270 break; 282 271 283 case ST_TYPEOF_1: << 284 if (token == IDENT) << 285 { << 286 if (is_reserved_word(yytext, yyleng) << 287 || find_symbol(yytext, SYM_TYPED << 288 { << 289 yyless(0); << 290 unput('('); << 291 lexstate = ST_NORMAL; << 292 token = TYPEOF_KEYW; << 293 break; << 294 } << 295 _APP("(", 1); << 296 } << 297 lexstate = ST_TYPEOF; << 298 /* FALLTHRU */ << 299 << 300 case ST_TYPEOF: << 301 switch (token) << 302 { << 303 case '(': << 304 if ( ++count == 1 ) << 305 lexstate = ST_TYPEOF_1; << 306 else << 307 APP; << 308 goto repeat; << 309 case ')': << 310 APP; << 311 if (--count == 0) << 312 { << 313 lexstate = ST_NORMAL; << 314 token = TYPEOF_PHRASE; << 315 break; << 316 } << 317 goto repeat; << 318 default: << 319 APP; << 320 goto repeat; << 321 } << 322 break; << 323 << 324 case ST_BRACKET: 272 case ST_BRACKET: 325 APP; 273 APP; 326 switch (token) 274 switch (token) 327 { 275 { 328 case '[': 276 case '[': 329 ++count; 277 ++count; 330 goto repeat; 278 goto repeat; 331 case ']': 279 case ']': 332 if (--count == 0) 280 if (--count == 0) 333 { 281 { 334 lexstate = ST_NORMAL; 282 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 283 token = BRACKET_PHRASE; 336 break; 284 break; 337 } 285 } 338 goto repeat; 286 goto repeat; 339 default: 287 default: 340 goto repeat; 288 goto repeat; 341 } 289 } 342 break; 290 break; 343 291 344 case ST_BRACE: 292 case ST_BRACE: 345 APP; 293 APP; 346 switch (token) 294 switch (token) 347 { 295 { 348 case '{': 296 case '{': 349 ++count; 297 ++count; 350 goto repeat; 298 goto repeat; 351 case '}': 299 case '}': 352 if (--count == 0) 300 if (--count == 0) 353 { 301 { 354 lexstate = ST_NORMAL; 302 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 303 token = BRACE_PHRASE; 356 break; 304 break; 357 } 305 } 358 goto repeat; 306 goto repeat; 359 default: 307 default: 360 goto repeat; 308 goto repeat; 361 } 309 } 362 break; 310 break; 363 311 364 case ST_EXPRESSION: 312 case ST_EXPRESSION: 365 switch (token) 313 switch (token) 366 { 314 { 367 case '(': case '[': case '{': 315 case '(': case '[': case '{': 368 ++count; 316 ++count; 369 APP; 317 APP; 370 goto repeat; 318 goto repeat; 371 case '}': !! 319 case ')': case ']': case '}': 372 /* is this the last line of an enum << 373 if (count == 0) << 374 { << 375 /* Put back the token we just re << 376 after registering the express << 377 unput(token); << 378 << 379 lexstate = ST_NORMAL; << 380 token = EXPRESSION_PHRASE; << 381 break; << 382 } << 383 /* FALLTHRU */ << 384 case ')': case ']': << 385 --count; 320 --count; 386 APP; 321 APP; 387 goto repeat; 322 goto repeat; 388 case ',': case ';': 323 case ',': case ';': 389 if (count == 0) 324 if (count == 0) 390 { 325 { 391 /* Put back the token we just re 326 /* Put back the token we just read so's we can find it again 392 after registering the express 327 after registering the expression. */ 393 unput(token); 328 unput(token); 394 329 395 lexstate = ST_NORMAL; 330 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 331 token = EXPRESSION_PHRASE; 397 break; 332 break; 398 } 333 } 399 APP; 334 APP; 400 goto repeat; 335 goto repeat; 401 default: 336 default: 402 APP; 337 APP; 403 goto repeat; 338 goto repeat; 404 } 339 } 405 break; 340 break; 406 341 407 case ST_STATIC_ASSERT: !! 342 case ST_TABLE_1: 408 APP; !! 343 goto repeat; >> 344 >> 345 case ST_TABLE_2: >> 346 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') >> 347 { >> 348 token = EXPORT_SYMBOL_KEYW; >> 349 lexstate = ST_TABLE_5; >> 350 APP; >> 351 break; >> 352 } >> 353 lexstate = ST_TABLE_6; >> 354 /* FALLTHRU */ >> 355 >> 356 case ST_TABLE_6: 409 switch (token) 357 switch (token) 410 { 358 { 411 case '(': !! 359 case '{': case '[': case '(': 412 ++count; 360 ++count; 413 goto repeat; !! 361 break; 414 case ')': !! 362 case '}': case ']': case ')': 415 if (--count == 0) !! 363 --count; 416 { !! 364 break; 417 lexstate = ST_NORMAL; !! 365 case ',': 418 token = STATIC_ASSERT_PHRASE; !! 366 if (count == 0) 419 break; !! 367 lexstate = ST_TABLE_2; 420 } !! 368 break; 421 goto repeat; !! 369 }; >> 370 goto repeat; >> 371 >> 372 case ST_TABLE_3: >> 373 goto repeat; >> 374 >> 375 case ST_TABLE_4: >> 376 if (token == ';') >> 377 lexstate = ST_NORMAL; >> 378 goto repeat; >> 379 >> 380 case ST_TABLE_5: >> 381 switch (token) >> 382 { >> 383 case ',': >> 384 token = ';'; >> 385 lexstate = ST_TABLE_2; >> 386 APP; >> 387 break; 422 default: 388 default: 423 goto repeat; !! 389 APP; >> 390 break; 424 } 391 } 425 break; 392 break; 426 393 427 default: 394 default: 428 exit(1); !! 395 abort(); 429 } 396 } 430 fini: 397 fini: 431 398 432 if (suppress_type_lookup > 0) 399 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 400 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 401 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 402 --dont_want_brace_phrase; 436 403 437 yylval = &next_node->next; 404 yylval = &next_node->next; 438 405 439 return token; 406 return token; 440 } 407 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.