# -*- coding: utf-8 -*-
-
#
# Copyright (c) 2009, 2010, 2013 Jack Kaliko <kaliko@azylum.org>
#
# If not, see <http://www.gnu.org/licenses/>.
#
-"""
+r"""
SimaStr
Special unicode() subclass to perform fuzzy match on specific strings with
# IMPORTS
import unicodedata
-from re import (compile, U, I)
+from re import compile as re_compile, U, I
from ..utils.leven import levenshtein_ratio
# Trailing patterns: ! ? live
# TODO: add "concert" key word
# add "Live at <somewhere>"
- regexp_dict.update({'trail': '([- !?\.]|\(? ?[Ll]ive ?\)?)'})
+ regexp_dict.update({'trail': r'([- !?\.]|\(? ?[Ll]ive ?\)?)'})
- reg_lead = compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
- reg_midl = compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
- reg_trail = compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
+ reg_lead = re_compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
+ reg_midl = re_compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
+ reg_trail = re_compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
def __init__(self, fuzzstr):
"""
# fuzzy computation
self._get_root()
if self.__class__.diafilter:
- self.remove_diacritics()
+ self.remove_diacritics()
def __new__(cls, fuzzstr):
return super(SimaStr, cls).__new__(cls, fuzzstr)
self.stripped = sea.group('root0')
def remove_diacritics(self):
+ """converting diacritics"""
self.stripped = ''.join(x for x in
unicodedata.normalize('NFKD', self.stripped)
if unicodedata.category(x) != 'Mn')