1 # -*- coding: utf-8 -*-
2 # Copyright (c) 2009, 2010, 2013 Jack Kaliko <kaliko@azylum.org>
4 # This file is part of sima
6 # sima is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # sima is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with sima. If not, see <http://www.gnu.org/licenses/>.
19 """Computes levenshtein distance/ratio"""
21 def levenshtein(a_st, b_st):
22 """Computes the Levenshtein distance between two strings."""
23 n_a, m_b = len(a_st), len(b_st)
25 # Make sure n <= m, to use O(min(n_a,m_b)) space
26 a_st, b_st = b_st, a_st
29 current = list(range(n_a+1))
30 for i in range(1, m_b+1):
31 previous, current = current, [i]+[0]*n_a
32 for j in range(1, n_a+1):
33 add, delete = previous[j] + 1, current[j-1] + 1
34 change = previous[j-1]
35 if a_st[j-1] != b_st[i-1]:
37 current[j] = min(add, delete, change)
41 def levenshtein_ratio(string, strong):
43 Compute levenshtein ratio.
44 Ratio = levenshtein distance / lenght of longer string
45 The longer string length is the upper bound of levenshtein distance.
47 lev_dist = levenshtein(string, strong)
48 max_len = max(len(string), len(strong))
49 ratio = 1 - (float(lev_dist) / float(max_len))
54 # vim: ai ts=4 sw=4 sts=4 expandtab