X-Git-Url: https://git.kaliko.me/?a=blobdiff_plain;f=sima%2Flib%2Fsimastr.py;h=9dacc45d1ea3af6302a1f56b08c9e6049b117a9b;hb=9ec2e9036e1f0fe67e8ddd7e8fb7f91a2e86cd62;hp=56cd2423a0474b540df2c4f010ee89f017585b0e;hpb=1d41464ccb6ff66441947eef0305518e3ce79a77;p=mpd-sima.git diff --git a/sima/lib/simastr.py b/sima/lib/simastr.py index 56cd242..9dacc45 100644 --- a/sima/lib/simastr.py +++ b/sima/lib/simastr.py @@ -80,6 +80,8 @@ class SimaStr(str): Specific string object for artist names and song titles. Here follows some class variables for regex to run on strings. """ + diafilter = True + leven_ratio = 0.82 regexp_dict = dict() # Leading patterns: The Le Les @@ -101,12 +103,15 @@ class SimaStr(str): def __init__(self, fuzzstr): """ """ - super().__init__(fuzzstr) self.orig = str(fuzzstr) self.stripped = str(fuzzstr.strip()) # fuzzy computation self._get_root() - self.remove_diacritics() + if self.__class__.diafilter: + self.remove_diacritics() + + def __new__(cls, fuzzstr): + return super(SimaStr, cls).__new__(cls, fuzzstr) def _get_root(self): """ @@ -143,7 +148,7 @@ class SimaStr(str): other.stripped.lower()) if hash(self) == hash(other): return True - return levenr >= 0.82 + return levenr >= self.__class__.leven_ratio def __ne__(self, other): if not isinstance(other, SimaStr):