Skip to content

Commit 1746258

Browse files
committed
Add Damerau-Levenshtein distance algorithm
1 parent ed19b1c commit 1746258

File tree

1 file changed

+73
-0
lines changed

1 file changed

+73
-0
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""
2+
This script is a implementation of the Damerau-Levenshtein distance algorithm.
3+
4+
It's an algorithm that measures the edit distance between two string sequences
5+
6+
More information about this algorithm can be found in this wikipedia article:
7+
https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
8+
"""
9+
10+
11+
def damerau_levenshtein_distance(first_string: str, second_string: str) -> int:
12+
"""
13+
Implements the Damerau-Levenshtein distance algorithm that measures
14+
the edit distance between two string. This function calculates the true
15+
Damerau-Levenshtein distance with adjacent transpositions.
16+
17+
Parameters:
18+
first_string (string): The first string
19+
second_string (string): The second string
20+
21+
Returns:
22+
distance (int): The edit distance between the first and second strings
23+
24+
>>> damerau_levenshtein_distance("cat", "cut")
25+
1
26+
>>> damerau_levenshtein_distance("kitten", "sitting")
27+
3
28+
>>> damerau_levenshtein_distance("hello", "world")
29+
4
30+
>>> damerau_levenshtein_distance("book", "back")
31+
2
32+
>>> damerau_levenshtein_distance("container", "containment")
33+
3
34+
"""
35+
36+
# Create a dynamic programming matrix to store the distances
37+
dp_matrix = [
38+
[None] * (len(second_string) + 1) for _ in range(len(first_string) + 1)
39+
]
40+
41+
# Initialize the matrix
42+
for i in range(len(first_string) + 1):
43+
dp_matrix[i][0] = i
44+
for j in range(len(second_string) + 1):
45+
dp_matrix[0][j] = j
46+
47+
# Fill the matrix
48+
for i in range(1, len(first_string) + 1):
49+
for j in range(1, len(second_string) + 1):
50+
cost = 0 if first_string[i - 1] == second_string[j - 1] else 1
51+
52+
dp_matrix[i][j] = min(
53+
dp_matrix[i - 1][j] + 1, # Deletion
54+
dp_matrix[i][j - 1] + 1, # Insertion
55+
dp_matrix[i - 1][j - 1] + cost, # Substitution
56+
)
57+
58+
# Calculate Transposition
59+
if (
60+
i > 1
61+
and j > 1
62+
and first_string[i - 1] == second_string[j - 2]
63+
and first_string[i - 2] == second_string[j - 1]
64+
):
65+
dp_matrix[i][j] = min(dp_matrix[i][j], dp_matrix[i - 2][j - 2] + cost)
66+
67+
return dp_matrix[len(first_string)][len(second_string)]
68+
69+
70+
if __name__ == "__main__":
71+
import doctest
72+
73+
doctest.testmod()

0 commit comments

Comments
 (0)