Clean some linter warnings and errors

[mpd-sima.git] / sima / lib / simastr.py
diff --git a/sima/lib/simastr.py b/sima/lib/simastr.py

index c7162144cd95cb3b3d65b793e3e18871ed16e220..033a994e23f7bea76b99db648570426c8a62e4ad 100644 (file)
--- a/sima/lib/simastr.py
+++ b/sima/lib/simastr.py
@@ -1,7 +1,6 @@
  # -*- coding: utf-8 -*-
  # -*- coding: utf-8 -*-
-
  #
  #
-# Copyright (c) 2009, 2010, 2013 Jack Kaliko <kaliko@azylum.org>
+# Copyright (c) 2009, 2010, 2013 kaliko <kaliko@azylum.org>
  #
  #  This program is free software; you can redistribute it and/or modify
  #  it under the terms of the GNU General Public License as
  #
  #  This program is free software; you can redistribute it and/or modify
  #  it under the terms of the GNU General Public License as
@@ -18,7 +17,7 @@
  #  If not, see <http://www.gnu.org/licenses/>.
  #
  
  #  If not, see <http://www.gnu.org/licenses/>.
  #
  
-"""
+r"""
  SimaStr
  
  Special unicode() subclass to perform fuzzy match on specific strings with
  SimaStr
  
  Special unicode() subclass to perform fuzzy match on specific strings with
@@ -70,7 +69,7 @@ __version__ = '0.4'
  
  # IMPORTS
  import unicodedata
  
  # IMPORTS
  import unicodedata
-from re import (compile, U, I)
+from re import compile as re_compile, U, I
  
  from ..utils.leven import levenshtein_ratio
  
  
  from ..utils.leven import levenshtein_ratio
  
@@ -80,6 +79,8 @@ class SimaStr(str):
      Specific string object for artist names and song titles.
      Here follows some class variables for regex to run on strings.
      """
      Specific string object for artist names and song titles.
      Here follows some class variables for regex to run on strings.
      """
+    diafilter = True
+    leven_ratio = 0.82
      regexp_dict = dict()
  
      # Leading patterns: The Le Les
      regexp_dict = dict()
  
      # Leading patterns: The Le Les
@@ -92,11 +93,11 @@ class SimaStr(str):
      # Trailing patterns: ! ? live
      # TODO: add "concert" key word
      #       add "Live at <somewhere>"
      # Trailing patterns: ! ? live
      # TODO: add "concert" key word
      #       add "Live at <somewhere>"
-    regexp_dict.update({'trail': '([- !?\.]|\(? ?[Ll]ive ?\)?)'})
+    regexp_dict.update({'trail': r'([- !?\.]|\(? ?[Ll]ive ?\)?)'})
  
  
-    reg_lead = compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
-    reg_midl = compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
-    reg_trail = compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
+    reg_lead = re_compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
+    reg_midl = re_compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
+    reg_trail = re_compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
  
      def __init__(self, fuzzstr):
          """
  
      def __init__(self, fuzzstr):
          """
@@ -105,7 +106,8 @@ class SimaStr(str):
          self.stripped = str(fuzzstr.strip())
          # fuzzy computation
          self._get_root()
          self.stripped = str(fuzzstr.strip())
          # fuzzy computation
          self._get_root()
-        self.remove_diacritics()
+        if self.__class__.diafilter:
+            self.remove_diacritics()
  
      def __new__(cls, fuzzstr):
          return super(SimaStr, cls).__new__(cls, fuzzstr)
  
      def __new__(cls, fuzzstr):
          return super(SimaStr, cls).__new__(cls, fuzzstr)
@@ -116,21 +118,19 @@ class SimaStr(str):
          """
          sea = SimaStr.reg_lead.search(self.stripped)
          if sea:
          """
          sea = SimaStr.reg_lead.search(self.stripped)
          if sea:
-            #print sea.groupdict()
              self.stripped = sea.group('root0')
  
          sea = SimaStr.reg_midl.search(self.stripped)
          if sea:
              self.stripped = sea.group('root0')
  
          sea = SimaStr.reg_midl.search(self.stripped)
          if sea:
-            #print sea.groupdict()
              self.stripped = str().join([sea.group('root0'), ' ',
                                          sea.group('root1')])
  
          sea = SimaStr.reg_trail.search(self.stripped)
          if sea:
              self.stripped = str().join([sea.group('root0'), ' ',
                                          sea.group('root1')])
  
          sea = SimaStr.reg_trail.search(self.stripped)
          if sea:
-            #print sea.groupdict()
              self.stripped = sea.group('root0')
  
      def remove_diacritics(self):
              self.stripped = sea.group('root0')
  
      def remove_diacritics(self):
+        """converting diacritics"""
          self.stripped = ''.join(x for x in
                                  unicodedata.normalize('NFKD', self.stripped)
                                  if unicodedata.category(x) != 'Mn')
          self.stripped = ''.join(x for x in
                                  unicodedata.normalize('NFKD', self.stripped)
                                  if unicodedata.category(x) != 'Mn')
@@ -145,7 +145,7 @@ class SimaStr(str):
                                     other.stripped.lower())
          if hash(self) == hash(other):
              return True
                                     other.stripped.lower())
          if hash(self) == hash(other):
              return True
-        return levenr >= 0.82
+        return levenr >= self.__class__.leven_ratio
  
      def __ne__(self, other):
          if not isinstance(other, SimaStr):
  
      def __ne__(self, other):
          if not isinstance(other, SimaStr):
@@ -153,40 +153,5 @@ class SimaStr(str):
          return hash(self) != hash(other)
  
  
          return hash(self) != hash(other)
  
  
-# Script starts here
-if __name__ == "__main__":
-    import time
-    print(SimaStr('Kétanoue'))
-    #from leven import levenshtein_ratio
-    CASES_LIST = list([
-        dict({
-                    'got': 'Guns N\' Roses (live)!! !',
-                'look for': 'Guns And Roses'}),
-        dict({
-                     'got': 'Jesus & Mary Chains',
-                'look for': 'The Jesus and Mary Chains - live'}),
-        dict({
-                         'got': 'Desert sessions',
-                    'look for': 'The Desert Sessions'}),
-        dict({
-                         'got': 'Têtes Raides',
-                    'look for': 'Les Têtes Raides'}),
-        dict({
-                         'got': 'Noir Désir',
-                    'look for': 'Noir Désir'}),
-        dict({
-                         'got': 'No Future',
-                    'look for': 'Future'})])
-
-    for case in CASES_LIST[:]:
-        str0 = case.get('got')
-        str1 = case.get('look for')
-        fz_str0 = SimaStr(str0)
-        fz_str1 = SimaStr(str1)
-        print(fz_str0, '\n', fz_str1)
-        print(fz_str0.stripped == fz_str1.stripped)
-        #print levenshtein_ratio(fz_str0.lower(), fz_str1.lower())
-        time.sleep(1)
-
  # VIM MODLINE
  # vim: ai ts=4 sw=4 sts=4 expandtab
  # VIM MODLINE
  # vim: ai ts=4 sw=4 sts=4 expandtab