X-Git-Url: https://git.kaliko.me/?a=blobdiff_plain;f=sima%2Flib%2Fsimastr.py;h=ec82d91510bd57c724e829ff968ff34bb9e27e3e;hb=HEAD;hp=56edbb4d7e68b82a816ad2d30cb6a99698d78864;hpb=78a694ddcd2a6ecc8b2b1fd3c74ee2d938707305;p=mpd-sima.git diff --git a/sima/lib/simastr.py b/sima/lib/simastr.py index 56edbb4..ec82d91 100644 --- a/sima/lib/simastr.py +++ b/sima/lib/simastr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (c) 2009, 2010, 2013 Jack Kaliko +# Copyright (c) 2009, 2010, 2013, 2021 kaliko # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as @@ -81,7 +81,7 @@ class SimaStr(str): """ diafilter = True leven_ratio = 0.82 - regexp_dict = dict() + regexp_dict = {} # Leading patterns: The Le Les # case-insensitive matching for this RE @@ -102,6 +102,7 @@ class SimaStr(str): def __init__(self, fuzzstr): """ """ + super().__init__() self.orig = str(fuzzstr) self.stripped = str(fuzzstr.strip()) # fuzzy computation @@ -118,18 +119,15 @@ class SimaStr(str): """ sea = SimaStr.reg_lead.search(self.stripped) if sea: - #print sea.groupdict() self.stripped = sea.group('root0') sea = SimaStr.reg_midl.search(self.stripped) if sea: - #print sea.groupdict() self.stripped = str().join([sea.group('root0'), ' ', sea.group('root1')]) sea = SimaStr.reg_trail.search(self.stripped) if sea: - #print sea.groupdict() self.stripped = sea.group('root0') def remove_diacritics(self): @@ -156,40 +154,5 @@ class SimaStr(str): return hash(self) != hash(other) -# Script starts here -if __name__ == "__main__": - import time - print(SimaStr('Kétanoue')) - #from leven import levenshtein_ratio - CASES_LIST = list([ - dict({ - 'got': 'Guns N\' Roses (live)!! !', - 'look for': 'Guns And Roses'}), - dict({ - 'got': 'Jesus & Mary Chains', - 'look for': 'The Jesus and Mary Chains - live'}), - dict({ - 'got': 'Desert sessions', - 'look for': 'The Desert Sessions'}), - dict({ - 'got': 'Têtes Raides', - 'look for': 'Les Têtes Raides'}), - dict({ - 'got': 'Noir Désir', - 'look for': 'Noir Désir'}), - dict({ - 'got': 'No Future', - 'look for': 'Future'})]) - - for case in CASES_LIST[:]: - str0 = case.get('got') - str1 = case.get('look for') - fz_str0 = SimaStr(str0) - fz_str1 = SimaStr(str1) - print(fz_str0, '\n', fz_str1) - print(fz_str0.stripped == fz_str1.stripped) - #print levenshtein_ratio(fz_str0.lower(), fz_str1.lower()) - time.sleep(1) - # VIM MODLINE # vim: ai ts=4 sw=4 sts=4 expandtab