- def _cross_check_artist(self, art):
- """
- Controls presence of artists in liste in music library.
- Crosschecking artist names with SimaStr objects / difflib / levenshtein
-
- TODO: proceed crosschecking even when an artist matched !!!
- Not because we found "The Doors" as "The Doors" that there is no
- remaining entries as "Doors" :/
- not straight forward, need probably heavy refactoring.
- """
- matching_artists = list()
- artist = SimaStr(art)
- all_artists = self._cache.get('artists')
-
- # Check against the actual string in artist list
- if artist.orig in all_artists:
- self.log.debug('found exact match for "%s"' % artist)
- return [artist]
- # Then proceed with fuzzy matching if got nothing
- match = get_close_matches(artist.orig, all_artists, 50, 0.73)
- if not match:
- return []
- self.log.debug('found close match for "%s": %s' %
- (artist, '/'.join(match)))
- # Does not perform fuzzy matching on short and single word strings
- # Only lowercased comparison
- if ' ' not in artist.orig and len(artist) < 8:
- for fuzz_art in match:
- # Regular string comparison SimaStr().lower is regular string
- if artist.lower() == fuzz_art.lower():
- matching_artists.append(fuzz_art)
- self.log.debug('"%s" matches "%s".' % (fuzz_art, artist))
- return matching_artists
- for fuzz_art in match:
- # Regular string comparison SimaStr().lower is regular string
- if artist.lower() == fuzz_art.lower():
- matching_artists.append(fuzz_art)
- self.log.debug('"%s" matches "%s".' % (fuzz_art, artist))
- return matching_artists
- # Proceed with levenshtein and SimaStr
- leven = levenshtein_ratio(artist.stripped.lower(),
- SimaStr(fuzz_art).stripped.lower())
- # SimaStr string __eq__, not regular string comparison here
- if artist == fuzz_art:
- matching_artists.append(fuzz_art)
- self.log.info('"%s" quite probably matches "%s" (SimaStr)' %
- (fuzz_art, artist))
- elif leven >= 0.82: # PARAM
- matching_artists.append(fuzz_art)
- self.log.debug('FZZZ: "%s" should match "%s" (lr=%1.3f)' %
- (fuzz_art, artist, leven))
- else:
- self.log.debug('FZZZ: "%s" does not match "%s" (lr=%1.3f)' %
- (fuzz_art, artist, leven))
- return matching_artists
-