1 /* SPDX-License-Identifier: GPL-2.0-or-later * !! 1 /* Lexical analysis for genksyms. 2 /* !! 2 Copyright 1996, 1997 Linux International. 3 * Lexical analysis for genksyms. !! 3 4 * Copyright 1996, 1997 Linux International. !! 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 * !! 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 * New implementation contributed by Richard H< !! 6 7 * Based on original work by Bjorn Ekwall <bj0r !! 7 Taken from Linux modutils 2.4.22. 8 * !! 8 9 * Taken from Linux modutils 2.4.22. !! 9 This program is free software; you can redistribute it and/or modify it 10 */ !! 10 under the terms of the GNU General Public License as published by the >> 11 Free Software Foundation; either version 2 of the License, or (at your >> 12 option) any later version. >> 13 >> 14 This program is distributed in the hope that it will be useful, but >> 15 WITHOUT ANY WARRANTY; without even the implied warranty of >> 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> 17 General Public License for more details. >> 18 >> 19 You should have received a copy of the GNU General Public License >> 20 along with this program; if not, write to the Free Software Foundation, >> 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ >> 22 11 23 12 %{ 24 %{ 13 25 14 #include <limits.h> 26 #include <limits.h> 15 #include <stdlib.h> 27 #include <stdlib.h> 16 #include <string.h> 28 #include <string.h> 17 #include <ctype.h> 29 #include <ctype.h> 18 30 19 #include "genksyms.h" 31 #include "genksyms.h" 20 #include "parse.tab.h" !! 32 #include "parse.h" 21 33 22 /* We've got a two-level lexer here. We let f 34 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 35 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 36 #define YY_DECL static int yylex1(void) 25 37 26 %} 38 %} 27 39 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 41 30 O_INT 0[0-7]* 42 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 43 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 44 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 47 36 FRAC ([0-9]*\.[0-9]+)|([0-9 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 49 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 50 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 52 41 STRING L?\"([^\\\"]*\\.)*[^\\ 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 55 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 57 >> 58 /* Version 2 checksumming does proper tokenization; version 1 wasn't >> 59 quite so pedantic. */ >> 60 %s V2_TOKENS >> 61 46 /* We don't do multiple input files. */ 62 /* We don't do multiple input files. */ 47 %option noyywrap 63 %option noyywrap 48 64 49 %option noinput 65 %option noinput 50 66 51 %% 67 %% 52 68 53 69 54 /* Keep track of our location in the original 70 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 71 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 72 ^#.*\n cur_line++; 57 \n cur_li 73 \n cur_line++; 58 74 59 /* Ignore all other whitespace. */ 75 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 76 [ \t\f\v\r]+ ; 61 77 62 78 63 {STRING} return 79 {STRING} return STRING; 64 {CHAR} return 80 {CHAR} return CHAR; 65 {IDENT} return 81 {IDENT} return IDENT; 66 82 67 /* The Pedant requires that the other C multi 83 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 84 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 85 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 86 around them properly. */ 71 {MC_TOKEN} return !! 87 <V2_TOKENS>{MC_TOKEN} return OTHER; 72 {INT} return !! 88 <V2_TOKENS>{INT} return INT; 73 {REAL} return !! 89 <V2_TOKENS>{REAL} return REAL; 74 90 75 "..." return 91 "..." return DOTS; 76 92 77 /* All other tokens are single characters. * 93 /* All other tokens are single characters. */ 78 . return 94 . return yytext[0]; 79 95 80 96 81 %% 97 %% 82 98 83 /* Bring in the keyword recognizer. */ 99 /* Bring in the keyword recognizer. */ 84 100 85 #include "keywords.c" 101 #include "keywords.c" 86 102 87 103 88 /* Macros to append to our phrase collection l 104 /* Macros to append to our phrase collection list. */ 89 105 90 /* << 91 * We mark any token, that that equals to a kn << 92 * SYM_ENUM_CONST. The parser will change this << 93 * the only problem is struct and union member << 94 * enum e { a, b }; struct s { int a, b; } << 95 * but in this case, the only effect will be, << 96 * more volatile, which is acceptable. Also, s << 97 * so far it was only observed in include/linu << 98 */ << 99 #define _APP(T,L) do { 106 #define _APP(T,L) do { \ 100 cur_node = next_node 107 cur_node = next_node; \ 101 next_node = xmalloc( 108 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 109 next_node->next = cur_node; \ 103 cur_node->string = m 110 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = !! 111 cur_node->tag = SYM_NORMAL; \ 105 find_symbol(cur_no << 106 SYM_ENUM_CONST : S << 107 cur_node->in_source_ << 108 } while (0) 112 } while (0) 109 113 110 #define APP _APP(yytext, yyleng) 114 #define APP _APP(yytext, yyleng) 111 115 112 116 113 /* The second stage lexer. Here we incorporat 117 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 118 of the parser to tailor the tokens that are returned. */ 115 119 116 int 120 int 117 yylex(void) 121 yylex(void) 118 { 122 { 119 static enum { 123 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST !! 124 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST !! 125 ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, >> 126 ST_TABLE_5, ST_TABLE_6 122 } lexstate = ST_NOTSTARTED; 127 } lexstate = ST_NOTSTARTED; 123 128 124 static int suppress_type_lookup, dont_want_b 129 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 130 static struct string_list *next_node; 126 static char *source_file; << 127 131 128 int token, count = 0; 132 int token, count = 0; 129 struct string_list *cur_node; 133 struct string_list *cur_node; 130 134 131 if (lexstate == ST_NOTSTARTED) 135 if (lexstate == ST_NOTSTARTED) 132 { 136 { >> 137 BEGIN(V2_TOKENS); 133 next_node = xmalloc(sizeof(*next_node)); 138 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 139 next_node->next = NULL; 135 lexstate = ST_NORMAL; 140 lexstate = ST_NORMAL; 136 } 141 } 137 142 138 repeat: 143 repeat: 139 token = yylex1(); 144 token = yylex1(); 140 145 141 if (token == 0) 146 if (token == 0) 142 return 0; 147 return 0; 143 else if (token == FILENAME) 148 else if (token == FILENAME) 144 { 149 { 145 char *file, *e; 150 char *file, *e; 146 151 147 /* Save the filename and line number for 152 /* Save the filename and line number for later error messages. */ 148 153 149 if (cur_filename) 154 if (cur_filename) 150 free(cur_filename); 155 free(cur_filename); 151 156 152 file = strchr(yytext, '\"')+1; 157 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 158 e = strchr(file, '\"'); 154 *e = '\0'; 159 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 160 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 161 cur_line = atoi(yytext+2); 157 162 158 if (!source_file) { << 159 source_file = xstrdup(cur_filename); << 160 in_source_file = 1; << 161 } else { << 162 in_source_file = (strcmp(cur_filename, << 163 } << 164 << 165 goto repeat; 163 goto repeat; 166 } 164 } 167 165 168 switch (lexstate) 166 switch (lexstate) 169 { 167 { 170 case ST_NORMAL: 168 case ST_NORMAL: 171 switch (token) 169 switch (token) 172 { 170 { 173 case IDENT: 171 case IDENT: 174 APP; 172 APP; 175 { 173 { 176 int r = is_reserved_word(yytext, y !! 174 const struct resword *r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) !! 175 if (r) 178 { 176 { 179 switch (token = r) !! 177 switch (token = r->token) 180 { 178 { 181 case ATTRIBUTE_KEYW: 179 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 180 lexstate = ST_ATTRIBUTE; 183 count = 0; 181 count = 0; 184 goto repeat; 182 goto repeat; 185 case ASM_KEYW: 183 case ASM_KEYW: 186 lexstate = ST_ASM; 184 lexstate = ST_ASM; 187 count = 0; 185 count = 0; 188 goto repeat; 186 goto repeat; 189 case TYPEOF_KEYW: << 190 lexstate = ST_TYPEOF; << 191 count = 0; << 192 goto repeat; << 193 187 194 case STRUCT_KEYW: 188 case STRUCT_KEYW: 195 case UNION_KEYW: 189 case UNION_KEYW: 196 case ENUM_KEYW: << 197 dont_want_brace_phrase = 3 190 dont_want_brace_phrase = 3; >> 191 case ENUM_KEYW: 198 suppress_type_lookup = 2; 192 suppress_type_lookup = 2; 199 goto fini; 193 goto fini; 200 194 201 case EXPORT_SYMBOL_KEYW: 195 case EXPORT_SYMBOL_KEYW: 202 goto fini; 196 goto fini; 203 << 204 case STATIC_ASSERT_KEYW: << 205 lexstate = ST_STATIC_ASSER << 206 count = 0; << 207 goto repeat; << 208 } 197 } 209 } 198 } 210 if (!suppress_type_lookup) 199 if (!suppress_type_lookup) 211 { 200 { 212 if (find_symbol(yytext, SYM_TY !! 201 struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF); >> 202 if (sym && sym->type == SYM_TYPEDEF) 213 token = TYPE; 203 token = TYPE; 214 } 204 } 215 } 205 } 216 break; 206 break; 217 207 218 case '[': 208 case '[': 219 APP; 209 APP; 220 lexstate = ST_BRACKET; 210 lexstate = ST_BRACKET; 221 count = 1; 211 count = 1; 222 goto repeat; 212 goto repeat; 223 213 224 case '{': 214 case '{': 225 APP; 215 APP; 226 if (dont_want_brace_phrase) 216 if (dont_want_brace_phrase) 227 break; 217 break; 228 lexstate = ST_BRACE; 218 lexstate = ST_BRACE; 229 count = 1; 219 count = 1; 230 goto repeat; 220 goto repeat; 231 221 232 case '=': case ':': 222 case '=': case ':': 233 APP; 223 APP; 234 lexstate = ST_EXPRESSION; 224 lexstate = ST_EXPRESSION; 235 break; 225 break; 236 226 >> 227 case DOTS: 237 default: 228 default: 238 APP; 229 APP; 239 break; 230 break; 240 } 231 } 241 break; 232 break; 242 233 243 case ST_ATTRIBUTE: 234 case ST_ATTRIBUTE: 244 APP; 235 APP; 245 switch (token) 236 switch (token) 246 { 237 { 247 case '(': 238 case '(': 248 ++count; 239 ++count; 249 goto repeat; 240 goto repeat; 250 case ')': 241 case ')': 251 if (--count == 0) 242 if (--count == 0) 252 { 243 { 253 lexstate = ST_NORMAL; 244 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 245 token = ATTRIBUTE_PHRASE; 255 break; 246 break; 256 } 247 } 257 goto repeat; 248 goto repeat; 258 default: 249 default: 259 goto repeat; 250 goto repeat; 260 } 251 } 261 break; 252 break; 262 253 263 case ST_ASM: 254 case ST_ASM: 264 APP; 255 APP; 265 switch (token) 256 switch (token) 266 { 257 { 267 case '(': 258 case '(': 268 ++count; 259 ++count; 269 goto repeat; 260 goto repeat; 270 case ')': 261 case ')': 271 if (--count == 0) 262 if (--count == 0) 272 { 263 { 273 lexstate = ST_NORMAL; 264 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 265 token = ASM_PHRASE; 275 break; 266 break; 276 } 267 } 277 goto repeat; 268 goto repeat; 278 default: 269 default: 279 goto repeat; 270 goto repeat; 280 } 271 } 281 break; 272 break; 282 273 283 case ST_TYPEOF_1: << 284 if (token == IDENT) << 285 { << 286 if (is_reserved_word(yytext, yyleng) << 287 || find_symbol(yytext, SYM_TYPED << 288 { << 289 yyless(0); << 290 unput('('); << 291 lexstate = ST_NORMAL; << 292 token = TYPEOF_KEYW; << 293 break; << 294 } << 295 _APP("(", 1); << 296 } << 297 lexstate = ST_TYPEOF; << 298 /* FALLTHRU */ << 299 << 300 case ST_TYPEOF: << 301 switch (token) << 302 { << 303 case '(': << 304 if ( ++count == 1 ) << 305 lexstate = ST_TYPEOF_1; << 306 else << 307 APP; << 308 goto repeat; << 309 case ')': << 310 APP; << 311 if (--count == 0) << 312 { << 313 lexstate = ST_NORMAL; << 314 token = TYPEOF_PHRASE; << 315 break; << 316 } << 317 goto repeat; << 318 default: << 319 APP; << 320 goto repeat; << 321 } << 322 break; << 323 << 324 case ST_BRACKET: 274 case ST_BRACKET: 325 APP; 275 APP; 326 switch (token) 276 switch (token) 327 { 277 { 328 case '[': 278 case '[': 329 ++count; 279 ++count; 330 goto repeat; 280 goto repeat; 331 case ']': 281 case ']': 332 if (--count == 0) 282 if (--count == 0) 333 { 283 { 334 lexstate = ST_NORMAL; 284 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 285 token = BRACKET_PHRASE; 336 break; 286 break; 337 } 287 } 338 goto repeat; 288 goto repeat; 339 default: 289 default: 340 goto repeat; 290 goto repeat; 341 } 291 } 342 break; 292 break; 343 293 344 case ST_BRACE: 294 case ST_BRACE: 345 APP; 295 APP; 346 switch (token) 296 switch (token) 347 { 297 { 348 case '{': 298 case '{': 349 ++count; 299 ++count; 350 goto repeat; 300 goto repeat; 351 case '}': 301 case '}': 352 if (--count == 0) 302 if (--count == 0) 353 { 303 { 354 lexstate = ST_NORMAL; 304 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 305 token = BRACE_PHRASE; 356 break; 306 break; 357 } 307 } 358 goto repeat; 308 goto repeat; 359 default: 309 default: 360 goto repeat; 310 goto repeat; 361 } 311 } 362 break; 312 break; 363 313 364 case ST_EXPRESSION: 314 case ST_EXPRESSION: 365 switch (token) 315 switch (token) 366 { 316 { 367 case '(': case '[': case '{': 317 case '(': case '[': case '{': 368 ++count; 318 ++count; 369 APP; 319 APP; 370 goto repeat; 320 goto repeat; 371 case '}': !! 321 case ')': case ']': case '}': 372 /* is this the last line of an enum << 373 if (count == 0) << 374 { << 375 /* Put back the token we just re << 376 after registering the express << 377 unput(token); << 378 << 379 lexstate = ST_NORMAL; << 380 token = EXPRESSION_PHRASE; << 381 break; << 382 } << 383 /* FALLTHRU */ << 384 case ')': case ']': << 385 --count; 322 --count; 386 APP; 323 APP; 387 goto repeat; 324 goto repeat; 388 case ',': case ';': 325 case ',': case ';': 389 if (count == 0) 326 if (count == 0) 390 { 327 { 391 /* Put back the token we just re 328 /* Put back the token we just read so's we can find it again 392 after registering the express 329 after registering the expression. */ 393 unput(token); 330 unput(token); 394 331 395 lexstate = ST_NORMAL; 332 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 333 token = EXPRESSION_PHRASE; 397 break; 334 break; 398 } 335 } 399 APP; 336 APP; 400 goto repeat; 337 goto repeat; 401 default: 338 default: 402 APP; 339 APP; 403 goto repeat; 340 goto repeat; 404 } 341 } 405 break; 342 break; 406 343 407 case ST_STATIC_ASSERT: !! 344 case ST_TABLE_1: 408 APP; !! 345 goto repeat; >> 346 >> 347 case ST_TABLE_2: >> 348 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') >> 349 { >> 350 token = EXPORT_SYMBOL_KEYW; >> 351 lexstate = ST_TABLE_5; >> 352 APP; >> 353 break; >> 354 } >> 355 lexstate = ST_TABLE_6; >> 356 /* FALLTHRU */ >> 357 >> 358 case ST_TABLE_6: 409 switch (token) 359 switch (token) 410 { 360 { 411 case '(': !! 361 case '{': case '[': case '(': 412 ++count; 362 ++count; 413 goto repeat; !! 363 break; 414 case ')': !! 364 case '}': case ']': case ')': 415 if (--count == 0) !! 365 --count; 416 { !! 366 break; 417 lexstate = ST_NORMAL; !! 367 case ',': 418 token = STATIC_ASSERT_PHRASE; !! 368 if (count == 0) 419 break; !! 369 lexstate = ST_TABLE_2; 420 } !! 370 break; 421 goto repeat; !! 371 }; >> 372 goto repeat; >> 373 >> 374 case ST_TABLE_3: >> 375 goto repeat; >> 376 >> 377 case ST_TABLE_4: >> 378 if (token == ';') >> 379 lexstate = ST_NORMAL; >> 380 goto repeat; >> 381 >> 382 case ST_TABLE_5: >> 383 switch (token) >> 384 { >> 385 case ',': >> 386 token = ';'; >> 387 lexstate = ST_TABLE_2; >> 388 APP; >> 389 break; 422 default: 390 default: 423 goto repeat; !! 391 APP; >> 392 break; 424 } 393 } 425 break; 394 break; 426 395 427 default: 396 default: 428 exit(1); 397 exit(1); 429 } 398 } 430 fini: 399 fini: 431 400 432 if (suppress_type_lookup > 0) 401 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 402 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 403 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 404 --dont_want_brace_phrase; 436 405 437 yylval = &next_node->next; 406 yylval = &next_node->next; 438 407 439 return token; 408 return token; 440 } 409 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.