+ def fuzzy_find(self, art):
+ """
+ Controls presence of artist in music library.
+ Crosschecking artist names with SimaStr objects / difflib / levenshtein
+
+ TODO: proceed crosschecking even when an artist matched !!!
+ Not because we found "The Doors" as "The Doors" that there is no
+ remaining entries as "Doors" :/
+ not straight forward, need probably heavy refactoring.
+ """
+ matching_artists = list()
+ artist = SimaStr(art)
+ all_artists = self.list('artist')
+
+ # Check against the actual string in artist list
+ if artist.orig in all_artists:
+ self.log.debug('found exact match for "%s"' % artist)
+ return [artist]
+ # Then proceed with fuzzy matching if got nothing
+ match = get_close_matches(artist.orig, all_artists, 50, 0.73)
+ if not match:
+ return []
+ self.log.debug('found close match for "%s": %s' %
+ (artist, '/'.join(match)))
+ # Does not perform fuzzy matching on short and single word strings
+ # Only lowercased comparison
+ if ' ' not in artist.orig and len(artist) < 8:
+ for fuzz_art in match:
+ # Regular string comparison SimaStr().lower is regular string
+ if artist.lower() == fuzz_art.lower():
+ matching_artists.append(fuzz_art)
+ self.log.debug('"%s" matches "%s".' % (fuzz_art, artist))
+ return matching_artists
+ for fuzz_art in match:
+ # Regular string comparison SimaStr().lower is regular string
+ if artist.lower() == fuzz_art.lower():
+ matching_artists.append(fuzz_art)
+ self.log.debug('"%s" matches "%s".' % (fuzz_art, artist))
+ return matching_artists
+ # Proceed with levenshtein and SimaStr
+ leven = levenshtein_ratio(artist.stripped.lower(),
+ SimaStr(fuzz_art).stripped.lower())
+ # SimaStr string __eq__, not regular string comparison here
+ if artist == fuzz_art:
+ matching_artists.append(fuzz_art)
+ self.log.info('"%s" quite probably matches "%s" (SimaStr)' %
+ (fuzz_art, artist))
+ elif leven >= 0.82: # PARAM
+ matching_artists.append(fuzz_art)
+ self.log.debug('FZZZ: "%s" should match "%s" (lr=%1.3f)' %
+ (fuzz_art, artist, leven))
+ else:
+ self.log.debug('FZZZ: "%s" does not match "%s" (lr=%1.3f)' %
+ (fuzz_art, artist, leven))
+ return matching_artists
+