1 /* SPDX-License-Identifier: GPL-2.0-or-later * !! 1 /* Lexical analysis for genksyms. 2 /* !! 2 Copyright 1996, 1997 Linux International. 3 * Lexical analysis for genksyms. !! 3 4 * Copyright 1996, 1997 Linux International. !! 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 * !! 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 * New implementation contributed by Richard H< !! 6 7 * Based on original work by Bjorn Ekwall <bj0r !! 7 Taken from Linux modutils 2.4.22. 8 * !! 8 9 * Taken from Linux modutils 2.4.22. !! 9 This program is free software; you can redistribute it and/or modify it 10 */ !! 10 under the terms of the GNU General Public License as published by the >> 11 Free Software Foundation; either version 2 of the License, or (at your >> 12 option) any later version. >> 13 >> 14 This program is distributed in the hope that it will be useful, but >> 15 WITHOUT ANY WARRANTY; without even the implied warranty of >> 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> 17 General Public License for more details. >> 18 >> 19 You should have received a copy of the GNU General Public License >> 20 along with this program; if not, write to the Free Software Foundation, >> 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ >> 22 11 23 12 %{ 24 %{ 13 25 14 #include <limits.h> 26 #include <limits.h> 15 #include <stdlib.h> 27 #include <stdlib.h> 16 #include <string.h> 28 #include <string.h> 17 #include <ctype.h> 29 #include <ctype.h> 18 30 19 #include "genksyms.h" 31 #include "genksyms.h" 20 #include "parse.tab.h" 32 #include "parse.tab.h" 21 33 22 /* We've got a two-level lexer here. We let f 34 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens i 35 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void 36 #define YY_DECL static int yylex1(void) 25 37 26 %} 38 %} 27 39 28 IDENT [A-Za-z_\$][A-Za-z0-9_ 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 41 30 O_INT 0[0-7]* 42 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 43 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 44 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_IN 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 47 36 FRAC ([0-9]*\.[0-9]+)|([0-9 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 49 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 50 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?) 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 52 41 STRING L?\"([^\\\"]*\\.)*[^\\ 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\ 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 55 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&) 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 57 46 /* We don't do multiple input files. */ 58 /* We don't do multiple input files. */ 47 %option noyywrap 59 %option noyywrap 48 60 49 %option noinput 61 %option noinput 50 62 51 %% 63 %% 52 64 53 65 54 /* Keep track of our location in the original 66 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return 67 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_li 68 ^#.*\n cur_line++; 57 \n cur_li 69 \n cur_line++; 58 70 59 /* Ignore all other whitespace. */ 71 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 72 [ \t\f\v\r]+ ; 61 73 62 74 63 {STRING} return 75 {STRING} return STRING; 64 {CHAR} return 76 {CHAR} return CHAR; 65 {IDENT} return 77 {IDENT} return IDENT; 66 78 67 /* The Pedant requires that the other C multi 79 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually u 80 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespa 81 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 82 around them properly. */ 71 {MC_TOKEN} return 83 {MC_TOKEN} return OTHER; 72 {INT} return 84 {INT} return INT; 73 {REAL} return 85 {REAL} return REAL; 74 86 75 "..." return 87 "..." return DOTS; 76 88 77 /* All other tokens are single characters. * 89 /* All other tokens are single characters. */ 78 . return 90 . return yytext[0]; 79 91 80 92 81 %% 93 %% 82 94 83 /* Bring in the keyword recognizer. */ 95 /* Bring in the keyword recognizer. */ 84 96 85 #include "keywords.c" !! 97 #include "keywords.hash.c" 86 98 87 99 88 /* Macros to append to our phrase collection l 100 /* Macros to append to our phrase collection list. */ 89 101 90 /* 102 /* 91 * We mark any token, that that equals to a kn 103 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this 104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union member 105 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 106 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, 107 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, s 108 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linu 109 * so far it was only observed in include/linux/telephony.h. 98 */ 110 */ 99 #define _APP(T,L) do { 111 #define _APP(T,L) do { \ 100 cur_node = next_node 112 cur_node = next_node; \ 101 next_node = xmalloc( 113 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cu 114 next_node->next = cur_node; \ 103 cur_node->string = m 115 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = 116 cur_node->tag = \ 105 find_symbol(cur_no 117 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : S 118 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_ 119 cur_node->in_source_file = in_source_file; \ 108 } while (0) 120 } while (0) 109 121 110 #define APP _APP(yytext, yyleng) 122 #define APP _APP(yytext, yyleng) 111 123 112 124 113 /* The second stage lexer. Here we incorporat 125 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are 126 of the parser to tailor the tokens that are returned. */ 115 127 116 int 128 int 117 yylex(void) 129 yylex(void) 118 { 130 { 119 static enum { 131 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST !! 132 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_ST !! 133 ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, >> 134 ST_TABLE_5, ST_TABLE_6 122 } lexstate = ST_NOTSTARTED; 135 } lexstate = ST_NOTSTARTED; 123 136 124 static int suppress_type_lookup, dont_want_b 137 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 138 static struct string_list *next_node; 126 static char *source_file; << 127 139 128 int token, count = 0; 140 int token, count = 0; 129 struct string_list *cur_node; 141 struct string_list *cur_node; 130 142 131 if (lexstate == ST_NOTSTARTED) 143 if (lexstate == ST_NOTSTARTED) 132 { 144 { 133 next_node = xmalloc(sizeof(*next_node)); 145 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 146 next_node->next = NULL; 135 lexstate = ST_NORMAL; 147 lexstate = ST_NORMAL; 136 } 148 } 137 149 138 repeat: 150 repeat: 139 token = yylex1(); 151 token = yylex1(); 140 152 141 if (token == 0) 153 if (token == 0) 142 return 0; 154 return 0; 143 else if (token == FILENAME) 155 else if (token == FILENAME) 144 { 156 { 145 char *file, *e; 157 char *file, *e; 146 158 147 /* Save the filename and line number for 159 /* Save the filename and line number for later error messages. */ 148 160 149 if (cur_filename) 161 if (cur_filename) 150 free(cur_filename); 162 free(cur_filename); 151 163 152 file = strchr(yytext, '\"')+1; 164 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 165 e = strchr(file, '\"'); 154 *e = '\0'; 166 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), 167 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 168 cur_line = atoi(yytext+2); 157 169 158 if (!source_file) { 170 if (!source_file) { 159 source_file = xstrdup(cur_filename); 171 source_file = xstrdup(cur_filename); 160 in_source_file = 1; 172 in_source_file = 1; 161 } else { 173 } else { 162 in_source_file = (strcmp(cur_filename, 174 in_source_file = (strcmp(cur_filename, source_file) == 0); 163 } 175 } 164 176 165 goto repeat; 177 goto repeat; 166 } 178 } 167 179 168 switch (lexstate) 180 switch (lexstate) 169 { 181 { 170 case ST_NORMAL: 182 case ST_NORMAL: 171 switch (token) 183 switch (token) 172 { 184 { 173 case IDENT: 185 case IDENT: 174 APP; 186 APP; 175 { 187 { 176 int r = is_reserved_word(yytext, y !! 188 const struct resword *r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) !! 189 if (r) 178 { 190 { 179 switch (token = r) !! 191 switch (token = r->token) 180 { 192 { 181 case ATTRIBUTE_KEYW: 193 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 194 lexstate = ST_ATTRIBUTE; 183 count = 0; 195 count = 0; 184 goto repeat; 196 goto repeat; 185 case ASM_KEYW: 197 case ASM_KEYW: 186 lexstate = ST_ASM; 198 lexstate = ST_ASM; 187 count = 0; 199 count = 0; 188 goto repeat; 200 goto repeat; 189 case TYPEOF_KEYW: << 190 lexstate = ST_TYPEOF; << 191 count = 0; << 192 goto repeat; << 193 201 194 case STRUCT_KEYW: 202 case STRUCT_KEYW: 195 case UNION_KEYW: 203 case UNION_KEYW: 196 case ENUM_KEYW: 204 case ENUM_KEYW: 197 dont_want_brace_phrase = 3 205 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 206 suppress_type_lookup = 2; 199 goto fini; 207 goto fini; 200 208 201 case EXPORT_SYMBOL_KEYW: 209 case EXPORT_SYMBOL_KEYW: 202 goto fini; 210 goto fini; 203 << 204 case STATIC_ASSERT_KEYW: << 205 lexstate = ST_STATIC_ASSER << 206 count = 0; << 207 goto repeat; << 208 } 211 } 209 } 212 } 210 if (!suppress_type_lookup) 213 if (!suppress_type_lookup) 211 { 214 { 212 if (find_symbol(yytext, SYM_TY 215 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 213 token = TYPE; 216 token = TYPE; 214 } 217 } 215 } 218 } 216 break; 219 break; 217 220 218 case '[': 221 case '[': 219 APP; 222 APP; 220 lexstate = ST_BRACKET; 223 lexstate = ST_BRACKET; 221 count = 1; 224 count = 1; 222 goto repeat; 225 goto repeat; 223 226 224 case '{': 227 case '{': 225 APP; 228 APP; 226 if (dont_want_brace_phrase) 229 if (dont_want_brace_phrase) 227 break; 230 break; 228 lexstate = ST_BRACE; 231 lexstate = ST_BRACE; 229 count = 1; 232 count = 1; 230 goto repeat; 233 goto repeat; 231 234 232 case '=': case ':': 235 case '=': case ':': 233 APP; 236 APP; 234 lexstate = ST_EXPRESSION; 237 lexstate = ST_EXPRESSION; 235 break; 238 break; 236 239 >> 240 case DOTS: 237 default: 241 default: 238 APP; 242 APP; 239 break; 243 break; 240 } 244 } 241 break; 245 break; 242 246 243 case ST_ATTRIBUTE: 247 case ST_ATTRIBUTE: 244 APP; 248 APP; 245 switch (token) 249 switch (token) 246 { 250 { 247 case '(': 251 case '(': 248 ++count; 252 ++count; 249 goto repeat; 253 goto repeat; 250 case ')': 254 case ')': 251 if (--count == 0) 255 if (--count == 0) 252 { 256 { 253 lexstate = ST_NORMAL; 257 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 258 token = ATTRIBUTE_PHRASE; 255 break; 259 break; 256 } 260 } 257 goto repeat; 261 goto repeat; 258 default: 262 default: 259 goto repeat; 263 goto repeat; 260 } 264 } 261 break; 265 break; 262 266 263 case ST_ASM: 267 case ST_ASM: 264 APP; 268 APP; 265 switch (token) 269 switch (token) 266 { 270 { 267 case '(': 271 case '(': 268 ++count; 272 ++count; 269 goto repeat; 273 goto repeat; 270 case ')': 274 case ')': 271 if (--count == 0) 275 if (--count == 0) 272 { 276 { 273 lexstate = ST_NORMAL; 277 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 278 token = ASM_PHRASE; 275 break; 279 break; 276 } 280 } 277 goto repeat; 281 goto repeat; 278 default: 282 default: 279 goto repeat; 283 goto repeat; 280 } 284 } 281 break; 285 break; 282 286 283 case ST_TYPEOF_1: << 284 if (token == IDENT) << 285 { << 286 if (is_reserved_word(yytext, yyleng) << 287 || find_symbol(yytext, SYM_TYPED << 288 { << 289 yyless(0); << 290 unput('('); << 291 lexstate = ST_NORMAL; << 292 token = TYPEOF_KEYW; << 293 break; << 294 } << 295 _APP("(", 1); << 296 } << 297 lexstate = ST_TYPEOF; << 298 /* FALLTHRU */ << 299 << 300 case ST_TYPEOF: << 301 switch (token) << 302 { << 303 case '(': << 304 if ( ++count == 1 ) << 305 lexstate = ST_TYPEOF_1; << 306 else << 307 APP; << 308 goto repeat; << 309 case ')': << 310 APP; << 311 if (--count == 0) << 312 { << 313 lexstate = ST_NORMAL; << 314 token = TYPEOF_PHRASE; << 315 break; << 316 } << 317 goto repeat; << 318 default: << 319 APP; << 320 goto repeat; << 321 } << 322 break; << 323 << 324 case ST_BRACKET: 287 case ST_BRACKET: 325 APP; 288 APP; 326 switch (token) 289 switch (token) 327 { 290 { 328 case '[': 291 case '[': 329 ++count; 292 ++count; 330 goto repeat; 293 goto repeat; 331 case ']': 294 case ']': 332 if (--count == 0) 295 if (--count == 0) 333 { 296 { 334 lexstate = ST_NORMAL; 297 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 298 token = BRACKET_PHRASE; 336 break; 299 break; 337 } 300 } 338 goto repeat; 301 goto repeat; 339 default: 302 default: 340 goto repeat; 303 goto repeat; 341 } 304 } 342 break; 305 break; 343 306 344 case ST_BRACE: 307 case ST_BRACE: 345 APP; 308 APP; 346 switch (token) 309 switch (token) 347 { 310 { 348 case '{': 311 case '{': 349 ++count; 312 ++count; 350 goto repeat; 313 goto repeat; 351 case '}': 314 case '}': 352 if (--count == 0) 315 if (--count == 0) 353 { 316 { 354 lexstate = ST_NORMAL; 317 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 318 token = BRACE_PHRASE; 356 break; 319 break; 357 } 320 } 358 goto repeat; 321 goto repeat; 359 default: 322 default: 360 goto repeat; 323 goto repeat; 361 } 324 } 362 break; 325 break; 363 326 364 case ST_EXPRESSION: 327 case ST_EXPRESSION: 365 switch (token) 328 switch (token) 366 { 329 { 367 case '(': case '[': case '{': 330 case '(': case '[': case '{': 368 ++count; 331 ++count; 369 APP; 332 APP; 370 goto repeat; 333 goto repeat; 371 case '}': 334 case '}': 372 /* is this the last line of an enum 335 /* is this the last line of an enum declaration? */ 373 if (count == 0) 336 if (count == 0) 374 { 337 { 375 /* Put back the token we just re 338 /* Put back the token we just read so's we can find it again 376 after registering the express 339 after registering the expression. */ 377 unput(token); 340 unput(token); 378 341 379 lexstate = ST_NORMAL; 342 lexstate = ST_NORMAL; 380 token = EXPRESSION_PHRASE; 343 token = EXPRESSION_PHRASE; 381 break; 344 break; 382 } 345 } 383 /* FALLTHRU */ 346 /* FALLTHRU */ 384 case ')': case ']': 347 case ')': case ']': 385 --count; 348 --count; 386 APP; 349 APP; 387 goto repeat; 350 goto repeat; 388 case ',': case ';': 351 case ',': case ';': 389 if (count == 0) 352 if (count == 0) 390 { 353 { 391 /* Put back the token we just re 354 /* Put back the token we just read so's we can find it again 392 after registering the express 355 after registering the expression. */ 393 unput(token); 356 unput(token); 394 357 395 lexstate = ST_NORMAL; 358 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 359 token = EXPRESSION_PHRASE; 397 break; 360 break; 398 } 361 } 399 APP; 362 APP; 400 goto repeat; 363 goto repeat; 401 default: 364 default: 402 APP; 365 APP; 403 goto repeat; 366 goto repeat; 404 } 367 } 405 break; 368 break; 406 369 407 case ST_STATIC_ASSERT: !! 370 case ST_TABLE_1: 408 APP; !! 371 goto repeat; >> 372 >> 373 case ST_TABLE_2: >> 374 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') >> 375 { >> 376 token = EXPORT_SYMBOL_KEYW; >> 377 lexstate = ST_TABLE_5; >> 378 APP; >> 379 break; >> 380 } >> 381 lexstate = ST_TABLE_6; >> 382 /* FALLTHRU */ >> 383 >> 384 case ST_TABLE_6: 409 switch (token) 385 switch (token) 410 { 386 { 411 case '(': !! 387 case '{': case '[': case '(': 412 ++count; 388 ++count; 413 goto repeat; !! 389 break; 414 case ')': !! 390 case '}': case ']': case ')': 415 if (--count == 0) !! 391 --count; 416 { !! 392 break; 417 lexstate = ST_NORMAL; !! 393 case ',': 418 token = STATIC_ASSERT_PHRASE; !! 394 if (count == 0) 419 break; !! 395 lexstate = ST_TABLE_2; 420 } !! 396 break; 421 goto repeat; !! 397 }; >> 398 goto repeat; >> 399 >> 400 case ST_TABLE_3: >> 401 goto repeat; >> 402 >> 403 case ST_TABLE_4: >> 404 if (token == ';') >> 405 lexstate = ST_NORMAL; >> 406 goto repeat; >> 407 >> 408 case ST_TABLE_5: >> 409 switch (token) >> 410 { >> 411 case ',': >> 412 token = ';'; >> 413 lexstate = ST_TABLE_2; >> 414 APP; >> 415 break; 422 default: 416 default: 423 goto repeat; !! 417 APP; >> 418 break; 424 } 419 } 425 break; 420 break; 426 421 427 default: 422 default: 428 exit(1); 423 exit(1); 429 } 424 } 430 fini: 425 fini: 431 426 432 if (suppress_type_lookup > 0) 427 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 428 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 429 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 430 --dont_want_brace_phrase; 436 431 437 yylval = &next_node->next; 432 yylval = &next_node->next; 438 433 439 return token; 434 return token; 440 } 435 }
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.