1 // SPDX-License-Identifier: GPL-2.0-or-later << 2 /* 1 /* 3 * lib/ts_kmp.c Knuth-Morris-Pratt tex 2 * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation 4 * 3 * >> 4 * This program is free software; you can redistribute it and/or >> 5 * modify it under the terms of the GNU General Public License >> 6 * as published by the Free Software Foundation; either version >> 7 * 2 of the License, or (at your option) any later version. >> 8 * 5 * Authors: Thomas Graf <tgraf@suug.ch> 9 * Authors: Thomas Graf <tgraf@suug.ch> 6 * 10 * 7 * =========================================== 11 * ========================================================================== 8 * 12 * 9 * Implements a linear-time string-matching 13 * Implements a linear-time string-matching algorithm due to Knuth, 10 * Morris, and Pratt [1]. Their algorithm av 14 * Morris, and Pratt [1]. Their algorithm avoids the explicit 11 * computation of the transition function DE 15 * computation of the transition function DELTA altogether. Its 12 * matching time is O(n), for n being length 16 * matching time is O(n), for n being length(text), using just an 13 * auxiliary function PI[1..m], for m being 17 * auxiliary function PI[1..m], for m being length(pattern), 14 * precomputed from the pattern in time O(m) 18 * precomputed from the pattern in time O(m). The array PI allows 15 * the transition function DELTA to be compu 19 * the transition function DELTA to be computed efficiently 16 * "on the fly" as needed. Roughly speaking, 20 * "on the fly" as needed. Roughly speaking, for any state 17 * "q" = 0,1,...,m and any character "a" in 21 * "q" = 0,1,...,m and any character "a" in SIGMA, the value 18 * PI["q"] contains the information that is 22 * PI["q"] contains the information that is independent of "a" and 19 * is needed to compute DELTA("q", "a") [2]. 23 * is needed to compute DELTA("q", "a") [2]. Since the array PI 20 * has only m entries, whereas DELTA has O(m 24 * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we 21 * save a factor of |SIGMA| in the preproces 25 * save a factor of |SIGMA| in the preprocessing time by computing 22 * PI rather than DELTA. 26 * PI rather than DELTA. 23 * 27 * 24 * [1] Cormen, Leiserson, Rivest, Stein 28 * [1] Cormen, Leiserson, Rivest, Stein 25 * Introdcution to Algorithms, 2nd Editi 29 * Introdcution to Algorithms, 2nd Edition, MIT Press 26 * [2] See finite automaton theory 30 * [2] See finite automaton theory 27 */ 31 */ 28 32 29 #include <linux/module.h> 33 #include <linux/module.h> 30 #include <linux/types.h> 34 #include <linux/types.h> 31 #include <linux/string.h> 35 #include <linux/string.h> 32 #include <linux/ctype.h> 36 #include <linux/ctype.h> 33 #include <linux/textsearch.h> 37 #include <linux/textsearch.h> 34 38 35 struct ts_kmp 39 struct ts_kmp 36 { 40 { 37 u8 * pattern; 41 u8 * pattern; 38 unsigned int pattern_len; 42 unsigned int pattern_len; 39 unsigned int prefix_tbl[]; !! 43 unsigned int prefix_tbl[0]; 40 }; 44 }; 41 45 42 static unsigned int kmp_find(struct ts_config 46 static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) 43 { 47 { 44 struct ts_kmp *kmp = ts_config_priv(co 48 struct ts_kmp *kmp = ts_config_priv(conf); 45 unsigned int i, q = 0, text_len, consu 49 unsigned int i, q = 0, text_len, consumed = state->offset; 46 const u8 *text; 50 const u8 *text; 47 const int icase = conf->flags & TS_IGN 51 const int icase = conf->flags & TS_IGNORECASE; 48 52 49 for (;;) { 53 for (;;) { 50 text_len = conf->get_next_bloc 54 text_len = conf->get_next_block(consumed, &text, conf, state); 51 55 52 if (unlikely(text_len == 0)) 56 if (unlikely(text_len == 0)) 53 break; 57 break; 54 58 55 for (i = 0; i < text_len; i++) 59 for (i = 0; i < text_len; i++) { 56 while (q > 0 && kmp->p 60 while (q > 0 && kmp->pattern[q] 57 != (icase ? touppe 61 != (icase ? toupper(text[i]) : text[i])) 58 q = kmp->prefi 62 q = kmp->prefix_tbl[q - 1]; 59 if (kmp->pattern[q] 63 if (kmp->pattern[q] 60 == (icase ? touppe 64 == (icase ? toupper(text[i]) : text[i])) 61 q++; 65 q++; 62 if (unlikely(q == kmp- 66 if (unlikely(q == kmp->pattern_len)) { 63 state->offset 67 state->offset = consumed + i + 1; 64 return state-> 68 return state->offset - kmp->pattern_len; 65 } 69 } 66 } 70 } 67 71 68 consumed += text_len; 72 consumed += text_len; 69 } 73 } 70 74 71 return UINT_MAX; 75 return UINT_MAX; 72 } 76 } 73 77 74 static inline void compute_prefix_tbl(const u8 78 static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, 75 unsigned 79 unsigned int *prefix_tbl, int flags) 76 { 80 { 77 unsigned int k, q; 81 unsigned int k, q; 78 const u8 icase = flags & TS_IGNORECASE 82 const u8 icase = flags & TS_IGNORECASE; 79 83 80 for (k = 0, q = 1; q < len; q++) { 84 for (k = 0, q = 1; q < len; q++) { 81 while (k > 0 && (icase ? toupp 85 while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k]) 82 != (icase ? toupper(patter 86 != (icase ? toupper(pattern[q]) : pattern[q])) 83 k = prefix_tbl[k-1]; 87 k = prefix_tbl[k-1]; 84 if ((icase ? toupper(pattern[k 88 if ((icase ? toupper(pattern[k]) : pattern[k]) 85 == (icase ? toupper(patter 89 == (icase ? toupper(pattern[q]) : pattern[q])) 86 k++; 90 k++; 87 prefix_tbl[q] = k; 91 prefix_tbl[q] = k; 88 } 92 } 89 } 93 } 90 94 91 static struct ts_config *kmp_init(const void * 95 static struct ts_config *kmp_init(const void *pattern, unsigned int len, 92 gfp_t gfp_ma 96 gfp_t gfp_mask, int flags) 93 { 97 { 94 struct ts_config *conf; 98 struct ts_config *conf; 95 struct ts_kmp *kmp; 99 struct ts_kmp *kmp; 96 int i; 100 int i; 97 unsigned int prefix_tbl_len = len * si 101 unsigned int prefix_tbl_len = len * sizeof(unsigned int); 98 size_t priv_size = sizeof(*kmp) + len 102 size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; 99 103 100 conf = alloc_ts_config(priv_size, gfp_ 104 conf = alloc_ts_config(priv_size, gfp_mask); 101 if (IS_ERR(conf)) 105 if (IS_ERR(conf)) 102 return conf; 106 return conf; 103 107 104 conf->flags = flags; 108 conf->flags = flags; 105 kmp = ts_config_priv(conf); 109 kmp = ts_config_priv(conf); 106 kmp->pattern_len = len; 110 kmp->pattern_len = len; 107 compute_prefix_tbl(pattern, len, kmp-> 111 compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags); 108 kmp->pattern = (u8 *) kmp->prefix_tbl 112 kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; 109 if (flags & TS_IGNORECASE) 113 if (flags & TS_IGNORECASE) 110 for (i = 0; i < len; i++) 114 for (i = 0; i < len; i++) 111 kmp->pattern[i] = toup 115 kmp->pattern[i] = toupper(((u8 *)pattern)[i]); 112 else 116 else 113 memcpy(kmp->pattern, pattern, 117 memcpy(kmp->pattern, pattern, len); 114 118 115 return conf; 119 return conf; 116 } 120 } 117 121 118 static void *kmp_get_pattern(struct ts_config 122 static void *kmp_get_pattern(struct ts_config *conf) 119 { 123 { 120 struct ts_kmp *kmp = ts_config_priv(co 124 struct ts_kmp *kmp = ts_config_priv(conf); 121 return kmp->pattern; 125 return kmp->pattern; 122 } 126 } 123 127 124 static unsigned int kmp_get_pattern_len(struct 128 static unsigned int kmp_get_pattern_len(struct ts_config *conf) 125 { 129 { 126 struct ts_kmp *kmp = ts_config_priv(co 130 struct ts_kmp *kmp = ts_config_priv(conf); 127 return kmp->pattern_len; 131 return kmp->pattern_len; 128 } 132 } 129 133 130 static struct ts_ops kmp_ops = { 134 static struct ts_ops kmp_ops = { 131 .name = "kmp", 135 .name = "kmp", 132 .find = kmp_find, 136 .find = kmp_find, 133 .init = kmp_init, 137 .init = kmp_init, 134 .get_pattern = kmp_get_pattern, 138 .get_pattern = kmp_get_pattern, 135 .get_pattern_len = kmp_get_pattern_le 139 .get_pattern_len = kmp_get_pattern_len, 136 .owner = THIS_MODULE, 140 .owner = THIS_MODULE, 137 .list = LIST_HEAD_INIT(kmp 141 .list = LIST_HEAD_INIT(kmp_ops.list) 138 }; 142 }; 139 143 140 static int __init init_kmp(void) 144 static int __init init_kmp(void) 141 { 145 { 142 return textsearch_register(&kmp_ops); 146 return textsearch_register(&kmp_ops); 143 } 147 } 144 148 145 static void __exit exit_kmp(void) 149 static void __exit exit_kmp(void) 146 { 150 { 147 textsearch_unregister(&kmp_ops); 151 textsearch_unregister(&kmp_ops); 148 } 152 } 149 153 150 MODULE_DESCRIPTION("Knuth-Morris-Pratt text se << 151 MODULE_LICENSE("GPL"); 154 MODULE_LICENSE("GPL"); 152 155 153 module_init(init_kmp); 156 module_init(init_kmp); 154 module_exit(exit_kmp); 157 module_exit(exit_kmp); 155 158
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.