# -*- coding: utf-8 -*-
-
#
# Copyright (c) 2009, 2010, 2013 Jack Kaliko <kaliko@azylum.org>
#
# If not, see <http://www.gnu.org/licenses/>.
#
-"""
+r"""
SimaStr
Special unicode() subclass to perform fuzzy match on specific strings with
# IMPORTS
import unicodedata
-from re import (compile, U, I)
+from re import compile as re_compile, U, I
from ..utils.leven import levenshtein_ratio
# Trailing patterns: ! ? live
# TODO: add "concert" key word
# add "Live at <somewhere>"
- regexp_dict.update({'trail': '([- !?\.]|\(? ?[Ll]ive ?\)?)'})
+ regexp_dict.update({'trail': r'([- !?\.]|\(? ?[Ll]ive ?\)?)'})
- reg_lead = compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
- reg_midl = compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
- reg_trail = compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
+ reg_lead = re_compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
+ reg_midl = re_compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
+ reg_trail = re_compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
def __init__(self, fuzzstr):
"""
# fuzzy computation
self._get_root()
if self.__class__.diafilter:
- self.remove_diacritics()
+ self.remove_diacritics()
def __new__(cls, fuzzstr):
return super(SimaStr, cls).__new__(cls, fuzzstr)
self.stripped = sea.group('root0')
def remove_diacritics(self):
+ """converting diacritics"""
self.stripped = ''.join(x for x in
unicodedata.normalize('NFKD', self.stripped)
if unicodedata.category(x) != 'Mn')
return hash(self) != hash(other)
-# Script starts here
-if __name__ == "__main__":
- import time
- print(SimaStr('Kétanoue'))
- #from leven import levenshtein_ratio
- CASES_LIST = list([
- dict({
- 'got': 'Guns N\' Roses (live)!! !',
- 'look for': 'Guns And Roses'}),
- dict({
- 'got': 'Jesus & Mary Chains',
- 'look for': 'The Jesus and Mary Chains - live'}),
- dict({
- 'got': 'Desert sessions',
- 'look for': 'The Desert Sessions'}),
- dict({
- 'got': 'Têtes Raides',
- 'look for': 'Les Têtes Raides'}),
- dict({
- 'got': 'Noir Désir',
- 'look for': 'Noir Désir'}),
- dict({
- 'got': 'No Future',
- 'look for': 'Future'})])
-
- for case in CASES_LIST[:]:
- str0 = case.get('got')
- str1 = case.get('look for')
- fz_str0 = SimaStr(str0)
- fz_str1 = SimaStr(str1)
- print(fz_str0, '\n', fz_str1)
- print(fz_str0.stripped == fz_str1.stripped)
- #print levenshtein_ratio(fz_str0.lower(), fz_str1.lower())
- time.sleep(1)
-
# VIM MODLINE
# vim: ai ts=4 sw=4 sts=4 expandtab