X-Git-Url: https://git.kaliko.me/?a=blobdiff_plain;f=sima%2Flib%2Fsimastr.py;fp=sima%2Flib%2Fsimastr.py;h=56edbb4d7e68b82a816ad2d30cb6a99698d78864;hb=78a694ddcd2a6ecc8b2b1fd3c74ee2d938707305;hp=9dacc45d1ea3af6302a1f56b08c9e6049b117a9b;hpb=300d9624d0c3bf2218a8df59eff689f5b78a2025;p=mpd-sima.git diff --git a/sima/lib/simastr.py b/sima/lib/simastr.py index 9dacc45..56edbb4 100644 --- a/sima/lib/simastr.py +++ b/sima/lib/simastr.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # # Copyright (c) 2009, 2010, 2013 Jack Kaliko # @@ -18,7 +17,7 @@ # If not, see . # -""" +r""" SimaStr Special unicode() subclass to perform fuzzy match on specific strings with @@ -70,7 +69,7 @@ __version__ = '0.4' # IMPORTS import unicodedata -from re import (compile, U, I) +from re import compile as re_compile, U, I from ..utils.leven import levenshtein_ratio @@ -94,11 +93,11 @@ class SimaStr(str): # Trailing patterns: ! ? live # TODO: add "concert" key word # add "Live at " - regexp_dict.update({'trail': '([- !?\.]|\(? ?[Ll]ive ?\)?)'}) + regexp_dict.update({'trail': r'([- !?\.]|\(? ?[Ll]ive ?\)?)'}) - reg_lead = compile('^(?P%(lead)s )(?P.*)$' % regexp_dict, I | U) - reg_midl = compile('^(?P.*)(?P %(mid)s )(?P.*)' % regexp_dict, U) - reg_trail = compile('^(?P.*?)(?P%(trail)s+$)' % regexp_dict, U) + reg_lead = re_compile('^(?P%(lead)s )(?P.*)$' % regexp_dict, I | U) + reg_midl = re_compile('^(?P.*)(?P %(mid)s )(?P.*)' % regexp_dict, U) + reg_trail = re_compile('^(?P.*?)(?P%(trail)s+$)' % regexp_dict, U) def __init__(self, fuzzstr): """ @@ -108,7 +107,7 @@ class SimaStr(str): # fuzzy computation self._get_root() if self.__class__.diafilter: - self.remove_diacritics() + self.remove_diacritics() def __new__(cls, fuzzstr): return super(SimaStr, cls).__new__(cls, fuzzstr) @@ -134,6 +133,7 @@ class SimaStr(str): self.stripped = sea.group('root0') def remove_diacritics(self): + """converting diacritics""" self.stripped = ''.join(x for x in unicodedata.normalize('NFKD', self.stripped) if unicodedata.category(x) != 'Mn')