X-Git-Url: https://git.kaliko.me/?a=blobdiff_plain;f=sima%2Flib%2Fsimafm.py;h=bd203f74f3330f1da06a0d7abd5d1f0c741e8a0b;hb=37dd60538984a3917354b794a5c96b0a025f8e95;hp=170488b9ad00f1f6ad9fe05d7709c3ba0a2d6c05;hpb=528e8d1b722d1e85d7e17eac636f764d457cc050;p=mpd-sima.git diff --git a/sima/lib/simafm.py b/sima/lib/simafm.py index 170488b..bd203f7 100644 --- a/sima/lib/simafm.py +++ b/sima/lib/simafm.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2009, 2010, 2011, 2012, 2013 Jack Kaliko -# Copyright (c) 2010 Eric Casteleijn (Throttle decorator) +# Copyright (c) 2009-2014 kaliko # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,294 +18,126 @@ # """ -Consume last.fm web service - +Consume Last.fm web service """ -__version__ = '0.4.0' +__version__ = '0.5.1' __author__ = 'Jack Kaliko' -import urllib.request, urllib.error, urllib.parse - -from datetime import datetime, timedelta -from http.client import BadStatusLine -from socket import timeout as SocketTimeOut -from xml.etree.cElementTree import ElementTree - -from request import get - from sima import LFM -from sima.utils.utils import getws, Throttle, Cache +from sima.lib.meta import Artist +from sima.lib.track import Track + +from sima.lib.http import HttpClient +from sima.utils.utils import WSError, WSNotFound +from sima.utils.utils import getws if len(LFM.get('apikey')) == 43: # simple hack allowing imp.reload getws(LFM) -# Some definitions -WAIT_BETWEEN_REQUESTS = timedelta(0, 0.4) - - -class XmlFMError(Exception): # Errors - """ - Exception raised for errors in the input. - """ - - def __init__(self, expression): - self.expression = expression - - def __str__(self): - return repr(self.expression) - - -class EncodingError(XmlFMError): - """Raised when string is not unicode""" - pass - - -class XmlFMHTTPError(XmlFMError): - """Raised when failed to connect server""" - def __init__(self, expression): - if hasattr(expression, 'code'): - self.expression = 'error %d: %s' % (expression.code, - expression.msg) - else: - self.expression = 'error: %s' % expression - - -class XmlFMNotFound(XmlFMError): - """Raised when no artist is found""" - - def __init__(self, message=None): - if not message: - message = 'Artist probably not found (http error 400)' - self.expression = (message) - - -class XmlFMMissingArtist(XmlFMError): - """Raised when no artist name provided""" - - def __init__(self, message=None): - if not message: - message = 'Missing artist name.' - self.expression = (message) - - -class XmlFMTimeOut(XmlFMError): - """Raised when urlopen times out""" - - def __init__(self, message=None): - if not message: - message = 'Connection to last.fm web services times out!' - self.expression = (message) - - -class SimaFM(): - """ +class SimaFM: + """Last.fm http client """ root_url = 'http://{host}/{version}/'.format(**LFM) - request = dict({'similar': '?method=artist.getsimilar&artist=%s&' +\ - 'api_key={apikey}'.format(**LFM), - 'top': '?method=artist.gettoptracks&artist=%s&' +\ - 'api_key={apikey}'.format(**LFM), - 'track': '?method=track.getsimilar&artist=%s' +\ - '&track=%s' + 'api_key={apikey}'.format(**LFM), - 'info': '?method=artist.getinfo&artist=%s' +\ - 'api_key={apikey}'.format(**LFM), - }) - payloads = dict({'similar': {'method':'artist.getsimilar', - 'artist':None, 'api_key':LFM.get('apikey'),}, - 'top': {'method':'artist.gettoptracks', - 'artist':None, 'api_key':LFM.get('apikey'),}, - 'track': {'method':'track.getsimilar', - 'artist':None, 'track':None, - 'api_key':LFM.get('apikey'),}, - 'info': {'method':'artist.getinfo', 'artist':None, - 'api_key':LFM.get('apikey'),}, - }) - cache = dict({}) - timestamp = datetime.utcnow() - count = 0 - - def __init__(self, artist=None, cache=True): - self._url = None - #SimaFM.count += 1 - self.current_element = None - self.caching = cache - self.purge_cache() - - def _is_in_cache(self): - """Controls presence of url in cache. - """ - if self._url in SimaFM.cache: - #print('already fetch {0}'.format(self.artist)) - return True - return False - - def _fetch(self): - """Use cached elements or proceed http request""" - if self._is_in_cache(): - self.current_element = SimaFM.cache.get(self._url).gettree() - return - self._fetch_lfm() + name = 'Last.fm' + cache = False + """HTTP cache to use, in memory or persitent. - @Throttle(WAIT_BETWEEN_REQUESTS) - def _fetch_ws(self): - pass - - @Throttle(WAIT_BETWEEN_REQUESTS) - def _fetch_lfm(self): - """Get artists, fetch xml from last.fm""" - try: - fd = urllib.request.urlopen(url=self._url, - timeout=15) - except SocketTimeOut: - raise XmlFMTimeOut() - except BadStatusLine as err: - raise XmlFMHTTPError(err) - except urllib.error.URLError as err: - if hasattr(err, 'reason'): - # URLError, failed to reach server - raise XmlFMError(repr(err.reason)) - if hasattr(err, 'code'): - # HTTPError, the server couldn't fulfill the request - if err.code == 400: - raise XmlFMNotFound() - raise XmlFMHTTPError(err) - raise XmlFMError(err) - headers = dict(fd.getheaders()) - content_type = headers.get('Content-Type').split(';') - if content_type[0] != "text/xml": - raise XmlFMError('None XML returned from the server') - if content_type[1].strip() != "charset=utf-8": - raise XmlFMError('XML not UTF-8 encoded!') - try: - self.current_element = ElementTree(file=fd) - except SocketTimeOut: - raise XmlFMTimeOut() - finally: - fd.close() - self._controls_lfm_answer() - if self.caching: - SimaFM.cache[self._url] = Cache(self.current_element) + :param BaseCache cache: Set a cache, defaults to `False`. + """ + stats = {'etag': 0, + 'ccontrol': 0, + 'total': 0} - def _controls_lfm_answer(self): - """Controls last.fm answer. - """ - status = self.current_element.getroot().attrib.get('status') - if status == 'ok': - return True - if status == 'failed': - error = self.current_element.find('error').attrib.get('code') - errormsg = self.current_element.findtext('error') - raise XmlFMNotFound(errormsg) + def __init__(self): + self.http = HttpClient(cache=self.cache, stats=self.stats) + self.artist = None - def _controls_artist(self, artist): + def _controls_answer(self, ans): + """Controls answer. """ + if 'error' in ans: + code = ans.get('error') + mess = ans.get('message') + if code == 6: + raise WSNotFound('{0}: "{1}"'.format(mess, self.artist)) + raise WSError(mess) + return True + + def _forge_payload(self, artist, method='similar', track=None): + """Build payload """ + payloads = dict({'similar': {'method': 'artist.getsimilar',}, + 'top': {'method': 'artist.gettoptracks',}, + 'track': {'method': 'track.getsimilar',}, + 'info': {'method': 'artist.getinfo',}, + }) + payload = payloads.get(method) + payload.update(api_key=LFM.get('apikey'), format='json') + if not isinstance(artist, Artist): + raise TypeError('"{0!r}" not an Artist object'.format(artist)) self.artist = artist - if not self.artist: - raise XmlFMMissingArtist('Missing artist name calling SimaFM.get_()') - if not isinstance(self.artist, str): - raise EncodingError('"%s" not unicode object' % self.artist) - # last.fm is UTF-8 encoded URL - self.artist_utf8 = self.artist.encode('UTF-8') - - def purge_cache(self, age=4): - now = datetime.utcnow() - if now.hour == SimaFM.timestamp.hour: - return - SimaFM.timestamp = datetime.utcnow() - cache = SimaFM.cache - delta = timedelta(hours=age) - for url in list(cache.keys()): - timestamp = cache.get(url).created() - if now - timestamp > delta: - cache.pop(url) - - def get_similar_ng(self, artist=None): - """ - """ - self._controls_artist(artist) - # Construct URL - self._req = get(SimaFM.root_url, params=None, timeout=5) - self._url = req.url - if self._is_in_cache(): - self.current_element = SimaFM.cache.get(self._url).gettree() + if artist.mbid: + payload.update(mbid='{0}'.format(artist.mbid)) else: - self._fetch_ws() - elem = self.current_element - for art in elem.getiterator(tag='artist'): - yield str(art.findtext('name')), 100 * float(art.findtext('match')) - - def get_similar(self, artist=None): - """ - """ - self._controls_artist(artist) - # Construct URL - url = SimaFM.root_url + SimaFM.request.get('similar') - self._url = url % (urllib.parse.quote(self.artist_utf8, safe='')) - self._fetch() - # TODO: controls name encoding - elem = self.current_element - for art in elem.getiterator(tag='artist'): - yield str(art.findtext('name')), 100 * float(art.findtext('match')) - - def get_toptracks(self, artist=None): - """ - """ - self._controls_artist(artist) - # Construct URL - url = SimaFM.root_url + SimaFM.request.get('top') - self._url = url % (urllib.parse.quote(self.artist_utf8, safe='')) - self._fetch() - # TODO: controls name encoding - elem = self.current_element - for track in elem.getiterator(tag='track'): - yield str(track.findtext('name')), int(track.attrib.get('rank')) - - def get_similartracks(self, track=None, artist=None): - """ + payload.update(artist=artist.name, + autocorrect=1) + payload.update(results=100) + if method == 'track': + payload.update(track=track) + # > hashing the URL into a cache key + # return a sorted list of 2-tuple to have consistent cache + return sorted(payload.items(), key=lambda param: param[0]) + + def get_similar(self, artist): + """Fetch similar artists + + :param sima.lib.meta.Artist artist: `Artist` to fetch similar artists from + :returns: generator of :class:`sima.lib.meta.Artist` """ + payload = self._forge_payload(artist) # Construct URL - url = SimaFM.root_url + SimaFM.request.get('track') - self._url = url % (urllib.parse.quote(artist.encode('UTF-8'), safe=''), - urllib.parse.quote(track.encode('UTF-8'), safe='')) - self._fetch() - elem = self.current_element - for trk in elem.getiterator(tag='track'): - yield (str(trk.findtext('artist/name')), - str(trk.findtext('name')), - 100 * float(trk.findtext('match'))) - - def get_mbid(self, artist=None): - """ + ans = self.http(self.root_url, payload) + try: + ans.json() + except ValueError: + # Corrupted/malformed cache? cf. gitlab issue #35 + raise WSError('Malformed json, try purging the cache: %s') + self._controls_answer(ans.json()) # pylint: disable=no-member + # Artist might be found but return no 'artist' list… + # cf. "Mulatu Astatqe" vs. "Mulatu Astatqé" with autocorrect=0 + # json format is broken IMHO, xml is more consistent IIRC + # Here what we got: + # >>> {"similarartists":{"#text":"\n","artist":"Mulatu Astatqe"}} + # autocorrect=1 should fix it, checking anyway. + simarts = ans.json().get('similarartists').get('artist') # pylint: disable=no-member + if not isinstance(simarts, list): + raise WSError('Artist found but no similarities returned') + for art in ans.json().get('similarartists').get('artist'): # pylint: disable=no-member + yield Artist(name=art.get('name'), mbid=art.get('mbid', None)) + + def get_toptrack(self, artist): + """Fetch artist top tracks + + :param sima.lib.meta.Artist artist: `Artist` to fetch top tracks from + :returns: generator of :class:`sima.lib.track.Track` """ - self._controls_artist(artist) - # Construct URL - url = SimaFM.root_url + SimaFM.request.get('info') - self._url = url % (urllib.parse.quote(self.artist_utf8, safe='')) - self._fetch() - # TODO: controls name encoding - elem = self.current_element - return str(elem.find('artist').findtext('mbid')) - - -def run(): - test = SimaFM() - for t, a, m in test.get_similartracks(artist='Nirvana', track='Smells Like Teen Spirit'): - print(a, t, m) - return - -if __name__ == '__main__': - try: - run() - except XmlFMHTTPError as conn_err: - print("error trying to connect: %s" % conn_err) - except XmlFMNotFound as not_found: - print("looks like no artists were found: %s" % not_found) - except XmlFMError as err: - print(err) - + payload = self._forge_payload(artist, method='top') + ans = self.http(self.root_url, payload) + self._controls_answer(ans.json()) # pylint: disable=no-member + tops = ans.json().get('toptracks').get('track') # pylint: disable=no-member + art = {'artist': artist.name, + 'musicbrainz_artistid': artist.mbid,} + for song in tops: + for key in ['artist', 'streamable', 'listeners', + 'url', 'image', '@attr']: + if key in song: + song.pop(key) + song.update(art) + song.update(title=song.pop('name')) + song.update(time=song.pop('duration', 0)) + yield Track(**song) # VIM MODLINE # vim: ai ts=4 sw=4 sts=4 expandtab