ci: Fixed unittest

[mpd-sima.git] / sima / lib / simastr.py
diff --git a/sima/lib/simastr.py b/sima/lib/simastr.py

index c7162144cd95cb3b3d65b793e3e18871ed16e220..cdb0195fbb7eac9a19dce6000ded3342deeb0668 100644 (file)
--- a/sima/lib/simastr.py
+++ b/sima/lib/simastr.py
@@ -1,7 +1,6 @@
  # -*- coding: utf-8 -*-
-
  #
-# Copyright (c) 2009, 2010, 2013 Jack Kaliko <kaliko@azylum.org>
+# Copyright (c) 2009, 2010, 2013 kaliko <kaliko@azylum.org>
  #
  #  This program is free software; you can redistribute it and/or modify
  #  it under the terms of the GNU General Public License as
@@ -18,7 +17,7 @@
  #  If not, see <http://www.gnu.org/licenses/>.
  #
  
-"""
+r"""
  SimaStr
  
  Special unicode() subclass to perform fuzzy match on specific strings with
@@ -70,7 +69,7 @@ __version__ = '0.4'
  
  # IMPORTS
  import unicodedata
-from re import (compile, U, I)
+from re import compile as re_compile, U, I
  
  from ..utils.leven import levenshtein_ratio
  
@@ -80,6 +79,8 @@ class SimaStr(str):
      Specific string object for artist names and song titles.
      Here follows some class variables for regex to run on strings.
      """
+    diafilter = True
+    leven_ratio = 0.82
      regexp_dict = dict()
  
      # Leading patterns: The Le Les
@@ -92,11 +93,11 @@ class SimaStr(str):
      # Trailing patterns: ! ? live
      # TODO: add "concert" key word
      #       add "Live at <somewhere>"
-    regexp_dict.update({'trail': '([- !?\.]|\(? ?[Ll]ive ?\)?)'})
+    regexp_dict.update({'trail': r'([- !?\.]|\(? ?[Ll]ive ?\)?)'})
  
-    reg_lead = compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
-    reg_midl = compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
-    reg_trail = compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
+    reg_lead = re_compile('^(?P<lead>%(lead)s )(?P<root0>.*)$' % regexp_dict, I | U)
+    reg_midl = re_compile('^(?P<root0>.*)(?P<mid> %(mid)s )(?P<root1>.*)' % regexp_dict, U)
+    reg_trail = re_compile('^(?P<root0>.*?)(?P<trail>%(trail)s+$)' % regexp_dict, U)
  
      def __init__(self, fuzzstr):
          """
@@ -105,7 +106,8 @@ class SimaStr(str):
          self.stripped = str(fuzzstr.strip())
          # fuzzy computation
          self._get_root()
-        self.remove_diacritics()
+        if self.__class__.diafilter:
+            self.remove_diacritics()
  
      def __new__(cls, fuzzstr):
          return super(SimaStr, cls).__new__(cls, fuzzstr)
@@ -131,6 +133,7 @@ class SimaStr(str):
              self.stripped = sea.group('root0')
  
      def remove_diacritics(self):
+        """converting diacritics"""
          self.stripped = ''.join(x for x in
                                  unicodedata.normalize('NFKD', self.stripped)
                                  if unicodedata.category(x) != 'Mn')
@@ -145,7 +148,7 @@ class SimaStr(str):
                                     other.stripped.lower())
          if hash(self) == hash(other):
              return True
-        return levenr >= 0.82
+        return levenr >= self.__class__.leven_ratio
  
      def __ne__(self, other):
          if not isinstance(other, SimaStr):
@@ -153,40 +156,5 @@ class SimaStr(str):
          return hash(self) != hash(other)
  
  
-# Script starts here
-if __name__ == "__main__":
-    import time
-    print(SimaStr('Kétanoue'))
-    #from leven import levenshtein_ratio
-    CASES_LIST = list([
-        dict({
-                    'got': 'Guns N\' Roses (live)!! !',
-                'look for': 'Guns And Roses'}),
-        dict({
-                     'got': 'Jesus & Mary Chains',
-                'look for': 'The Jesus and Mary Chains - live'}),
-        dict({
-                         'got': 'Desert sessions',
-                    'look for': 'The Desert Sessions'}),
-        dict({
-                         'got': 'Têtes Raides',
-                    'look for': 'Les Têtes Raides'}),
-        dict({
-                         'got': 'Noir Désir',
-                    'look for': 'Noir Désir'}),
-        dict({
-                         'got': 'No Future',
-                    'look for': 'Future'})])
-
-    for case in CASES_LIST[:]:
-        str0 = case.get('got')
-        str1 = case.get('look for')
-        fz_str0 = SimaStr(str0)
-        fz_str1 = SimaStr(str1)
-        print(fz_str0, '\n', fz_str1)
-        print(fz_str0.stripped == fz_str1.stripped)
-        #print levenshtein_ratio(fz_str0.lower(), fz_str1.lower())
-        time.sleep(1)
-
  # VIM MODLINE
  # vim: ai ts=4 sw=4 sts=4 expandtab