1 # -*- coding: utf-8 -*-
3 # Copyright (c) 2009, 2010, 2013 Jack Kaliko <kaliko@azylum.org>
5 # This file is part of sima
7 # sima is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # sima is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with sima. If not, see <http://www.gnu.org/licenses/>.
22 def levenshtein(a_st, b_st):
23 """Computes the Levenshtein distance between two strings."""
24 n_a, m_b = len(a_st), len(b_st)
26 # Make sure n <= m, to use O(min(n_a,m_b)) space
27 a_st, b_st = b_st, a_st
30 current = list(range(n_a+1))
31 for i in range(1, m_b+1):
32 previous, current = current, [i]+[0]*n_a
33 for j in range(1, n_a+1):
34 add, delete = previous[j] + 1, current[j-1] + 1
35 change = previous[j-1]
36 if a_st[j-1] != b_st[i-1]:
38 current[j] = min(add, delete, change)
42 def levenshtein_ratio(string, strong):
44 Compute levenshtein ratio.
45 Ratio = levenshtein distance / lenght of longer string
46 The longer string length is the upper bound of levenshtein distance.
48 lev_dist = levenshtein(string, strong)
49 max_len = max(len(string), len(strong))
50 ratio = 1 - (float(lev_dist) / float(max_len))
55 # vim: ai ts=4 sw=4 sts=4 expandtab