1 #include <linux/module.h> 1 #include <linux/module.h> 2 #include <linux/glob.h> 2 #include <linux/glob.h> 3 3 4 /* 4 /* 5 * The only reason this code can be compiled a 5 * The only reason this code can be compiled as a module is because the 6 * ATA code that depends on it can be as well. 6 * ATA code that depends on it can be as well. In practice, they're 7 * both usually compiled in and the module ove 7 * both usually compiled in and the module overhead goes away. 8 */ 8 */ 9 MODULE_DESCRIPTION("glob(7) matching"); 9 MODULE_DESCRIPTION("glob(7) matching"); 10 MODULE_LICENSE("Dual MIT/GPL"); 10 MODULE_LICENSE("Dual MIT/GPL"); 11 11 12 /** 12 /** 13 * glob_match - Shell-style pattern matching, 13 * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) 14 * @pat: Shell-style pattern to match, e.g. "* 14 * @pat: Shell-style pattern to match, e.g. "*.[ch]". 15 * @str: String to match. The pattern must ma 15 * @str: String to match. The pattern must match the entire string. 16 * 16 * 17 * Perform shell-style glob matching, returnin 17 * Perform shell-style glob matching, returning true (1) if the match 18 * succeeds, or false (0) if it fails. Equiva 18 * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). 19 * 19 * 20 * Pattern metacharacters are ?, *, [ and \. 20 * Pattern metacharacters are ?, *, [ and \. 21 * (And, inside character classes, !, - and ]. 21 * (And, inside character classes, !, - and ].) 22 * 22 * 23 * This is small and simple implementation int 23 * This is small and simple implementation intended for device blacklists 24 * where a string is matched against a number 24 * where a string is matched against a number of patterns. Thus, it 25 * does not preprocess the patterns. It is no 25 * does not preprocess the patterns. It is non-recursive, and run-time 26 * is at most quadratic: strlen(@str)*strlen(@ 26 * is at most quadratic: strlen(@str)*strlen(@pat). 27 * 27 * 28 * An example of the worst case is glob_match( 28 * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); 29 * it takes 6 passes over the pattern before m 29 * it takes 6 passes over the pattern before matching the string. 30 * 30 * 31 * Like !fnmatch(@pat, @str, 0) and unlike the 31 * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT 32 * treat / or leading . specially; it isn't ac 32 * treat / or leading . specially; it isn't actually used for pathnames. 33 * 33 * 34 * Note that according to glob(7) (and unlike 34 * Note that according to glob(7) (and unlike bash), character classes 35 * are complemented by a leading !; this does 35 * are complemented by a leading !; this does not support the regex-style 36 * [^a-z] syntax. 36 * [^a-z] syntax. 37 * 37 * 38 * An opening bracket without a matching close 38 * An opening bracket without a matching close is matched literally. 39 */ 39 */ 40 bool __pure glob_match(char const *pat, char c 40 bool __pure glob_match(char const *pat, char const *str) 41 { 41 { 42 /* 42 /* 43 * Backtrack to previous * on mismatch 43 * Backtrack to previous * on mismatch and retry starting one 44 * character later in the string. Bec 44 * character later in the string. Because * matches all characters 45 * (no exception for /), it can be eas 45 * (no exception for /), it can be easily proved that there's 46 * never a need to backtrack multiple 46 * never a need to backtrack multiple levels. 47 */ 47 */ 48 char const *back_pat = NULL, *back_str !! 48 char const *back_pat = NULL, *back_str = back_str; 49 49 50 /* 50 /* 51 * Loop over each token (character or 51 * Loop over each token (character or class) in pat, matching 52 * it against the remaining unmatched 52 * it against the remaining unmatched tail of str. Return false 53 * on mismatch, or true after matching 53 * on mismatch, or true after matching the trailing nul bytes. 54 */ 54 */ 55 for (;;) { 55 for (;;) { 56 unsigned char c = *str++; 56 unsigned char c = *str++; 57 unsigned char d = *pat++; 57 unsigned char d = *pat++; 58 58 59 switch (d) { 59 switch (d) { 60 case '?': /* Wildcard: a 60 case '?': /* Wildcard: anything but nul */ 61 if (c == '\0') 61 if (c == '\0') 62 return false; 62 return false; 63 break; 63 break; 64 case '*': /* Any-length 64 case '*': /* Any-length wildcard */ 65 if (*pat == '\0') 65 if (*pat == '\0') /* Optimize trailing * case */ 66 return true; 66 return true; 67 back_pat = pat; 67 back_pat = pat; 68 back_str = --str; 68 back_str = --str; /* Allow zero-length match */ 69 break; 69 break; 70 case '[': { /* Character c 70 case '[': { /* Character class */ 71 bool match = false, in 71 bool match = false, inverted = (*pat == '!'); 72 char const *class = pa 72 char const *class = pat + inverted; 73 unsigned char a = *cla 73 unsigned char a = *class++; 74 74 75 /* 75 /* 76 * Iterate over each s 76 * Iterate over each span in the character class. 77 * A span is either a 77 * A span is either a single character a, or a 78 * range a-b. The fir 78 * range a-b. The first span may begin with ']'. 79 */ 79 */ 80 do { 80 do { 81 unsigned char 81 unsigned char b = a; 82 82 83 if (a == '\0') 83 if (a == '\0') /* Malformed */ 84 goto l 84 goto literal; 85 85 86 if (class[0] = 86 if (class[0] == '-' && class[1] != ']') { 87 b = cl 87 b = class[1]; 88 88 89 if (b 89 if (b == '\0') 90 90 goto literal; 91 91 92 class 92 class += 2; 93 /* Any 93 /* Any special action if a > b? */ 94 } 94 } 95 match |= (a <= 95 match |= (a <= c && c <= b); 96 } while ((a = *class++ 96 } while ((a = *class++) != ']'); 97 97 98 if (match == inverted) 98 if (match == inverted) 99 goto backtrack 99 goto backtrack; 100 pat = class; 100 pat = class; 101 } 101 } 102 break; 102 break; 103 case '\\': 103 case '\\': 104 d = *pat++; 104 d = *pat++; 105 fallthrough; !! 105 /*FALLTHROUGH*/ 106 default: /* Literal cha 106 default: /* Literal character */ 107 literal: 107 literal: 108 if (c == d) { 108 if (c == d) { 109 if (d == '\0') 109 if (d == '\0') 110 return 110 return true; 111 break; 111 break; 112 } 112 } 113 backtrack: 113 backtrack: 114 if (c == '\0' || !back 114 if (c == '\0' || !back_pat) 115 return false; 115 return false; /* No point continuing */ 116 /* Try again from last 116 /* Try again from last *, one character later in str. */ 117 pat = back_pat; 117 pat = back_pat; 118 str = ++back_str; 118 str = ++back_str; 119 break; 119 break; 120 } 120 } 121 } 121 } 122 } 122 } 123 EXPORT_SYMBOL(glob_match); 123 EXPORT_SYMBOL(glob_match); >> 124 >> 125 >> 126 #ifdef CONFIG_GLOB_SELFTEST >> 127 >> 128 #include <linux/printk.h> >> 129 #include <linux/moduleparam.h> >> 130 >> 131 /* Boot with "glob.verbose=1" to show successful tests, too */ >> 132 static bool verbose = false; >> 133 module_param(verbose, bool, 0); >> 134 >> 135 struct glob_test { >> 136 char const *pat, *str; >> 137 bool expected; >> 138 }; >> 139 >> 140 static bool __pure __init test(char const *pat, char const *str, bool expected) >> 141 { >> 142 bool match = glob_match(pat, str); >> 143 bool success = match == expected; >> 144 >> 145 /* Can't get string literals into a particular section, so... */ >> 146 static char const msg_error[] __initconst = >> 147 KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; >> 148 static char const msg_ok[] __initconst = >> 149 KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; >> 150 static char const mismatch[] __initconst = "mismatch"; >> 151 char const *message; >> 152 >> 153 if (!success) >> 154 message = msg_error; >> 155 else if (verbose) >> 156 message = msg_ok; >> 157 else >> 158 return success; >> 159 >> 160 printk(message, pat, str, mismatch + 3*match); >> 161 return success; >> 162 } >> 163 >> 164 /* >> 165 * The tests are all jammed together in one array to make it simpler >> 166 * to place that array in the .init.rodata section. The obvious >> 167 * "array of structures containing char *" has no way to force the >> 168 * pointed-to strings to be in a particular section. >> 169 * >> 170 * Anyway, a test consists of: >> 171 * 1. Expected glob_match result: '1' or ''. >> 172 * 2. Pattern to match: null-terminated string >> 173 * 3. String to match against: null-terminated string >> 174 * >> 175 * The list of tests is terminated with a final '\0' instead of >> 176 * a glob_match result character. >> 177 */ >> 178 static char const glob_tests[] __initconst = >> 179 /* Some basic tests */ >> 180 "1" "a\0" "a\0" >> 181 "" "a\0" "b\0" >> 182 "" "a\0" "aa\0" >> 183 "" "a\0" "\0" >> 184 "1" "\0" "\0" >> 185 "" "\0" "a\0" >> 186 /* Simple character class tests */ >> 187 "1" "[a]\0" "a\0" >> 188 "" "[a]\0" "b\0" >> 189 "" "[!a]\0" "a\0" >> 190 "1" "[!a]\0" "b\0" >> 191 "1" "[ab]\0" "a\0" >> 192 "1" "[ab]\0" "b\0" >> 193 "" "[ab]\0" "c\0" >> 194 "1" "[!ab]\0" "c\0" >> 195 "1" "[a-c]\0" "b\0" >> 196 "" "[a-c]\0" "d\0" >> 197 /* Corner cases in character class parsing */ >> 198 "1" "[a-c-e-g]\0" "-\0" >> 199 "" "[a-c-e-g]\0" "d\0" >> 200 "1" "[a-c-e-g]\0" "f\0" >> 201 "1" "[]a-ceg-ik[]\0" "a\0" >> 202 "1" "[]a-ceg-ik[]\0" "]\0" >> 203 "1" "[]a-ceg-ik[]\0" "[\0" >> 204 "1" "[]a-ceg-ik[]\0" "h\0" >> 205 "" "[]a-ceg-ik[]\0" "f\0" >> 206 "" "[!]a-ceg-ik[]\0" "h\0" >> 207 "" "[!]a-ceg-ik[]\0" "]\0" >> 208 "1" "[!]a-ceg-ik[]\0" "f\0" >> 209 /* Simple wild cards */ >> 210 "1" "?\0" "a\0" >> 211 "" "?\0" "aa\0" >> 212 "" "??\0" "a\0" >> 213 "1" "?x?\0" "axb\0" >> 214 "" "?x?\0" "abx\0" >> 215 "" "?x?\0" "xab\0" >> 216 /* Asterisk wild cards (backtracking) */ >> 217 "" "*??\0" "a\0" >> 218 "1" "*??\0" "ab\0" >> 219 "1" "*??\0" "abc\0" >> 220 "1" "*??\0" "abcd\0" >> 221 "" "??*\0" "a\0" >> 222 "1" "??*\0" "ab\0" >> 223 "1" "??*\0" "abc\0" >> 224 "1" "??*\0" "abcd\0" >> 225 "" "?*?\0" "a\0" >> 226 "1" "?*?\0" "ab\0" >> 227 "1" "?*?\0" "abc\0" >> 228 "1" "?*?\0" "abcd\0" >> 229 "1" "*b\0" "b\0" >> 230 "1" "*b\0" "ab\0" >> 231 "" "*b\0" "ba\0" >> 232 "1" "*b\0" "bb\0" >> 233 "1" "*b\0" "abb\0" >> 234 "1" "*b\0" "bab\0" >> 235 "1" "*bc\0" "abbc\0" >> 236 "1" "*bc\0" "bc\0" >> 237 "1" "*bc\0" "bbc\0" >> 238 "1" "*bc\0" "bcbc\0" >> 239 /* Multiple asterisks (complex backtracking) */ >> 240 "1" "*ac*\0" "abacadaeafag\0" >> 241 "1" "*ac*ae*ag*\0" "abacadaeafag\0" >> 242 "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" >> 243 "" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" >> 244 "1" "*abcd*\0" "abcabcabcabcdefg\0" >> 245 "1" "*ab*cd*\0" "abcabcabcabcdefg\0" >> 246 "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" >> 247 "" "*abcd*\0" "abcabcabcabcefg\0" >> 248 "" "*ab*cd*\0" "abcabcabcabcefg\0"; >> 249 >> 250 static int __init glob_init(void) >> 251 { >> 252 unsigned successes = 0; >> 253 unsigned n = 0; >> 254 char const *p = glob_tests; >> 255 static char const message[] __initconst = >> 256 KERN_INFO "glob: %u self-tests passed, %u failed\n"; >> 257 >> 258 /* >> 259 * Tests are jammed together in a string. The first byte is '1' >> 260 * or '' to indicate the expected outcome, or '\0' to indicate the >> 261 * end of the tests. Then come two null-terminated strings: the >> 262 * pattern and the string to match it against. >> 263 */ >> 264 while (*p) { >> 265 bool expected = *p++ & 1; >> 266 char const *pat = p; >> 267 >> 268 p += strlen(p) + 1; >> 269 successes += test(pat, p, expected); >> 270 p += strlen(p) + 1; >> 271 n++; >> 272 } >> 273 >> 274 n -= successes; >> 275 printk(message, successes, n); >> 276 >> 277 /* What's the errno for "kernel bug detected"? Guess... */ >> 278 return n ? -ECANCELED : 0; >> 279 } >> 280 >> 281 /* We need a dummy exit function to allow unload */ >> 282 static void __exit glob_fini(void) { } >> 283 >> 284 module_init(glob_init); >> 285 module_exit(glob_fini); >> 286 >> 287 #endif /* CONFIG_GLOB_SELFTEST */ 124 288
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.