~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/perf/util/levenshtein.c

Version: ~ [ linux-6.11.5 ] ~ [ linux-6.10.14 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.58 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.114 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.169 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.228 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.284 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.322 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 #include "levenshtein.h"
  3 #include <errno.h>
  4 #include <stdlib.h>
  5 #include <string.h>
  6 
  7 /*
  8  * This function implements the Damerau-Levenshtein algorithm to
  9  * calculate a distance between strings.
 10  *
 11  * Basically, it says how many letters need to be swapped, substituted,
 12  * deleted from, or added to string1, at least, to get string2.
 13  *
 14  * The idea is to build a distance matrix for the substrings of both
 15  * strings.  To avoid a large space complexity, only the last three rows
 16  * are kept in memory (if swaps had the same or higher cost as one deletion
 17  * plus one insertion, only two rows would be needed).
 18  *
 19  * At any stage, "i + 1" denotes the length of the current substring of
 20  * string1 that the distance is calculated for.
 21  *
 22  * row2 holds the current row, row1 the previous row (i.e. for the substring
 23  * of string1 of length "i"), and row0 the row before that.
 24  *
 25  * In other words, at the start of the big loop, row2[j + 1] contains the
 26  * Damerau-Levenshtein distance between the substring of string1 of length
 27  * "i" and the substring of string2 of length "j + 1".
 28  *
 29  * All the big loop does is determine the partial minimum-cost paths.
 30  *
 31  * It does so by calculating the costs of the path ending in characters
 32  * i (in string1) and j (in string2), respectively, given that the last
 33  * operation is a substitution, a swap, a deletion, or an insertion.
 34  *
 35  * This implementation allows the costs to be weighted:
 36  *
 37  * - w (as in "sWap")
 38  * - s (as in "Substitution")
 39  * - a (for insertion, AKA "Add")
 40  * - d (as in "Deletion")
 41  *
 42  * Note that this algorithm calculates a distance _iff_ d == a.
 43  */
 44 int levenshtein(const char *string1, const char *string2,
 45                 int w, int s, int a, int d)
 46 {
 47         int len1 = strlen(string1), len2 = strlen(string2);
 48         int *row0 = malloc(sizeof(int) * (len2 + 1));
 49         int *row1 = malloc(sizeof(int) * (len2 + 1));
 50         int *row2 = malloc(sizeof(int) * (len2 + 1));
 51         int i, j;
 52 
 53         for (j = 0; j <= len2; j++)
 54                 row1[j] = j * a;
 55         for (i = 0; i < len1; i++) {
 56                 int *dummy;
 57 
 58                 row2[0] = (i + 1) * d;
 59                 for (j = 0; j < len2; j++) {
 60                         /* substitution */
 61                         row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
 62                         /* swap */
 63                         if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
 64                                         string1[i] == string2[j - 1] &&
 65                                         row2[j + 1] > row0[j - 1] + w)
 66                                 row2[j + 1] = row0[j - 1] + w;
 67                         /* deletion */
 68                         if (row2[j + 1] > row1[j + 1] + d)
 69                                 row2[j + 1] = row1[j + 1] + d;
 70                         /* insertion */
 71                         if (row2[j + 1] > row2[j] + a)
 72                                 row2[j + 1] = row2[j] + a;
 73                 }
 74 
 75                 dummy = row0;
 76                 row0 = row1;
 77                 row1 = row2;
 78                 row2 = dummy;
 79         }
 80 
 81         i = row1[len2];
 82         free(row0);
 83         free(row1);
 84         free(row2);
 85 
 86         return i;
 87 }
 88 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php