# -*- coding: utf-8 -*-
-
#
# Copyright (c) 2009, 2010, 2013 Jack Kaliko <kaliko@azylum.org>
#
# If not, see <http://www.gnu.org/licenses/>.
#
-"""
+r"""
SimaStr
Special unicode() subclass to perform fuzzy match on specific strings with
# IMPORTS
import unicodedata
-from re import (compile, U, I)
+from re import compile as re_compile, U, I
from ..utils.leven import levenshtein_ratio
Specific string object for artist names and song titles.
Here follows some class variables for regex to run on strings.
"""
+ diafilter = True
+ leven_ratio = 0.82
regexp_dict = dict()
# Leading patterns: The Le Les
# Trailing patterns: ! ? live
# TODO: add "concert" key word
# add "Live at <somewhere>"
- regexp_dict.update({'trail': '([- !?\.]|\(? ?[Ll]ive ?\)?)'})
+ regexp_dict.update({'trail': r'([- !?\.]|\(? ?[Ll]ive ?\)?)'})
- reg_lead = compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
- reg_midl = compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
- reg_trail = compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
+ reg_lead = re_compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
+ reg_midl = re_compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
+ reg_trail = re_compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
def __init__(self, fuzzstr):
"""
self.stripped = str(fuzzstr.strip())
# fuzzy computation
self._get_root()
- self.remove_diacritics()
+ if self.__class__.diafilter:
+ self.remove_diacritics()
def __new__(cls, fuzzstr):
return super(SimaStr, cls).__new__(cls, fuzzstr)
self.stripped = sea.group('root0')
def remove_diacritics(self):
+ """converting diacritics"""
self.stripped = ''.join(x for x in
unicodedata.normalize('NFKD', self.stripped)
if unicodedata.category(x) != 'Mn')
other.stripped.lower())
if hash(self) == hash(other):
return True
- return levenr >= 0.82
+ return levenr >= self.__class__.leven_ratio
def __ne__(self, other):
if not isinstance(other, SimaStr):