Add unit test as well.
* Add some randomness to track selection
* Do not queue artist already queued (regression)
+ * Some refactoring in http client
* …
-- kaliko jack <kaliko@azylum.org> UNRELEASED
import email.utils
+from requests import Session, Request, Timeout, ConnectionError
+
+from sima import SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS
+from sima.utils.utils import WSError, WSTimeout, WSHTTPError, Throttle
from .cache import DictCache
retval = dict(parts_with_args + parts_wo_args)
return retval
- def cached_request(self, url, headers):
+ def cached_request(self, request):
"""Return the cached resquest if available and fresh
"""
- cache_url = self.cache_url(url)
- cc = self.parse_cache_control(headers)
+ cache_url = self.cache_url(request.url)
+ cc = self.parse_cache_control(request.headers)
# non-caching states
no_cache = True if 'no-cache' in cc else False
for header in varied_headers:
# If our headers don't match for the headers listed in
# the vary header, then don't use the cached response
- if headers.get(header, None) != original_headers.get(header):
+ if request.headers.get(header, None) != original_headers.get(header):
return False
now = time.time()
self.cache.delete(cache_url)
if 'etag' in resp.headers:
- headers['If-None-Match'] = resp.headers['ETag']
+ request.headers['If-None-Match'] = resp.headers['ETag']
if 'last-modified' in resp.headers:
- headers['If-Modified-Since'] = resp.headers['Last-Modified']
+ request.headers['If-Modified-Since'] = resp.headers['Last-Modified']
# return the original handler
return False
resp.from_cache = True
return resp
+
+
+class HttpClient:
+ def __init__(self, cache=None, stats=None):
+ """
+ Prepare http request
+ Use cached elements or proceed http request
+ """
+ self.stats = stats
+ self.controller = CacheController(cache)
+
+ def __call__(self, ress, payload):
+ req = Request('GET', ress, params=payload,).prepare()
+ if self.stats:
+ self.stats.update(total=self.stats.get('total')+1)
+ cached_response = self.controller.cached_request(req)
+ if cached_response:
+ if self.stats:
+ self.stats.update(ccontrol=self.stats.get('ccontrol')+1)
+ return cached_response
+ try:
+ return self.fetch_ws(req)
+ except Timeout:
+ raise WSTimeout('Failed to reach server within {0}s'.format(
+ SOCKET_TIMEOUT))
+ except ConnectionError as err:
+ raise WSError(err)
+
+ @Throttle(WAIT_BETWEEN_REQUESTS)
+ def fetch_ws(self, prepreq):
+ """fetch from web service"""
+ sess = Session()
+ resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT)
+ if resp.status_code == 304:
+ self.stats.update(etag=self.stats.get('etag')+1)
+ resp = self.controller.update_cached_response(prepreq, resp)
+ elif resp.status_code != 200:
+ raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp))
+ ratelimit = resp.headers.get('x-ratelimit-remaining', None)
+ if ratelimit and self.stats:
+ minrl = min(int(ratelimit), self.stats.get('minrl'))
+ self.stats.update(minrl=minrl)
+ self.controller.cache_response(resp.request, resp)
+ return resp
+
+# VIM MODLINE
+# vim: ai ts=4 sw=4 sts=4 expandtab
Consume EchoNest web service
"""
-__version__ = '0.0.2'
+__version__ = '0.0.3'
__author__ = 'Jack Kaliko'
-from requests import Session, Request, Timeout, ConnectionError
-from sima import ECH, SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS
+from sima import ECH
from sima.lib.meta import Artist
from sima.lib.track import Track
-from sima.lib.http import CacheController
-from sima.utils.utils import WSError, WSNotFound, WSTimeout, WSHTTPError
-from sima.utils.utils import getws, Throttle
+from sima.lib.http import HttpClient
+from sima.utils.utils import WSError, WSNotFound
+from sima.utils.utils import getws
if len(ECH.get('apikey')) == 23: # simple hack allowing imp.reload
getws(ECH)
name = 'EchoNest'
cache = False
stats = {'etag':0,
- 'ccontrol':0,
- 'minrl':120,
- 'total':0}
+ 'ccontrol':0,
+ 'minrl':120,
+ 'total':0}
def __init__(self):
- self.controller = CacheController(self.cache)
-
- def _fetch(self, ressource, payload):
- """
- Prepare http request
- Use cached elements or proceed http request
- """
- req = Request('GET', ressource, params=payload,
- ).prepare()
- SimaEch.stats.update(total=SimaEch.stats.get('total')+1)
- if self.cache:
- cached_response = self.controller.cached_request(req.url, req.headers)
- if cached_response:
- SimaEch.stats.update(ccontrol=SimaEch.stats.get('ccontrol')+1)
- return cached_response.json()
- try:
- return self._fetch_ws(req)
- except Timeout:
- raise WSTimeout('Failed to reach server within {0}s'.format(
- SOCKET_TIMEOUT))
- except ConnectionError as err:
- raise WSError(err)
-
- @Throttle(WAIT_BETWEEN_REQUESTS)
- def _fetch_ws(self, prepreq):
- """fetch from web service"""
- sess = Session()
- resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT)
- if resp.status_code == 304:
- SimaEch.stats.update(etag=SimaEch.stats.get('etag')+1)
- resp = self.controller.update_cached_response(prepreq, resp)
- elif resp.status_code != 200:
- raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp))
- ans = resp.json()
- self._controls_answer(ans)
- SimaEch.ratelimit = resp.headers.get('x-ratelimit-remaining', None)
- minrl = min(int(SimaEch.ratelimit), SimaEch.stats.get('minrl'))
- SimaEch.stats.update(minrl=minrl)
- if self.cache:
- self.controller.cache_response(resp.request, resp)
- return ans
+ self.http = HttpClient(cache=self.cache, stats=self.stats)
def _controls_answer(self, ans):
"""Controls answer.
payload = self._forge_payload(artist)
# Construct URL
ressource = '{0}/artist/similar'.format(SimaEch.root_url)
- ans = self._fetch(ressource, payload)
- for art in ans.get('response').get('artists'):
+ ans = self.http(ressource, payload)
+ self._controls_answer(ans.json())
+ for art in ans.json().get('response').get('artists'):
mbid = None
if 'foreign_ids' in art:
for frgnid in art.get('foreign_ids'):
payload = self._forge_payload(artist, top=True)
# Construct URL
ressource = '{0}/song/search'.format(SimaEch.root_url)
- ans = self._fetch(ressource, payload)
+ ans = self.http(ressource, payload)
+ self._controls_answer(ans.json())
titles = list()
art = {
'artist': artist.name,
'musicbrainz_artistid': artist.mbid,
}
- for song in ans.get('response').get('songs'):
+ for song in ans.json().get('response').get('songs'):
title = song.get('title')
if title not in titles:
titles.append(title)
-from requests import Session, Request, Timeout, ConnectionError
-
-from sima import LFM, SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS
+from sima import LFM
from sima.lib.meta import Artist
from sima.lib.track import Track
-from sima.lib.http import CacheController
-from sima.utils.utils import WSError, WSNotFound, WSTimeout, WSHTTPError
-from sima.utils.utils import getws, Throttle
+from sima.lib.http import HttpClient
+from sima.utils.utils import WSError, WSNotFound
+from sima.utils.utils import getws
if len(LFM.get('apikey')) == 43: # simple hack allowing imp.reload
getws(LFM)
'total':0}
def __init__(self):
- self.controller = CacheController(self.cache)
+ self.http = HttpClient(cache=self.cache, stats=self.stats)
self.artist = None
- def _fetch(self, payload):
- """
- Prepare http request
- Use cached elements or proceed http request
- """
- req = Request('GET', SimaFM.root_url, params=payload,
- ).prepare()
- SimaFM.stats.update(total=SimaFM.stats.get('total')+1)
- if self.cache:
- cached_response = self.controller.cached_request(req.url, req.headers)
- if cached_response:
- SimaFM.stats.update(ccontrol=SimaFM.stats.get('ccontrol')+1)
- return cached_response.json()
- try:
- return self._fetch_ws(req)
- except Timeout:
- raise WSTimeout('Failed to reach server within {0}s'.format(
- SOCKET_TIMEOUT))
- except ConnectionError as err:
- raise WSError(err)
-
- @Throttle(WAIT_BETWEEN_REQUESTS)
- def _fetch_ws(self, prepreq):
- """fetch from web service"""
- sess = Session()
- resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT)
- if resp.status_code == 304:
- SimaFM.stats.update(etag=SimaFM.stats.get('etag')+1)
- resp = self.controller.update_cached_response(prepreq, resp)
- elif resp.status_code != 200:
- raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp))
- ans = resp.json()
- self._controls_answer(ans)
- if self.cache:
- self.controller.cache_response(resp.request, resp)
- return ans
-
def _controls_answer(self, ans):
"""Controls answer.
"""
"""
payload = self._forge_payload(artist)
# Construct URL
- ans = self._fetch(payload)
+ ans = self.http(self.root_url, payload)
+ self._controls_answer(ans.json())
# Artist might be found be return no 'artist' list…
# cf. "Mulatu Astatqe" vs. "Mulatu Astatqé" with autocorrect=0
# json format is broken IMHO, xml is more consistent IIRC
# Here what we got:
# >>> {"similarartists":{"#text":"\n","artist":"Mulatu Astatqe"}}
# autocorrect=1 should fix it, checking anyway.
- simarts = ans.get('similarartists').get('artist')
+ simarts = ans.json().get('similarartists').get('artist')
if not isinstance(simarts, list):
raise WSError('Artist found but no similarities returned')
- for art in ans.get('similarartists').get('artist'):
+ for art in ans.json().get('similarartists').get('artist'):
yield Artist(name=art.get('name'), mbid=art.get('mbid', None))
def get_toptrack(self, artist=None):
"""Fetch artist top tracks
"""
payload = self._forge_payload(artist, method='top')
- ans = self._fetch(payload)
- tops = ans.get('toptracks').get('track')
+ ans = self.http(self.root_url, payload)
+ self._controls_answer(ans.json())
+ tops = ans.json().get('toptracks').get('track')
art = {
'artist': artist.name,
'musicbrainz_artistid': artist.mbid,
--- /dev/null
+# -*- coding: utf-8 -*-
+
+import unittest
+import time
+
+from unittest.mock import Mock
+
+from sima.lib.cache import DictCache
+from sima.lib.http import CacheController
+
+TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
+
+
+class TestCacheControlRequest(unittest.TestCase):
+ url = 'http://foo.com/bar'
+
+ def setUp(self):
+ self.c = CacheController(
+ DictCache(),
+ )
+
+ def req(self, headers):
+ return self.c.cached_request(Mock(url=self.url, headers=headers))
+
+ def test_cache_request_no_cache(self):
+ resp = self.req({'cache-control': 'no-cache'})
+ assert not resp
+
+ def test_cache_request_pragma_no_cache(self):
+ resp = self.req({'pragma': 'no-cache'})
+ assert not resp
+
+ def test_cache_request_no_store(self):
+ resp = self.req({'cache-control': 'no-store'})
+ assert not resp
+
+ def test_cache_request_max_age_0(self):
+ resp = self.req({'cache-control': 'max-age=0'})
+ assert not resp
+
+ def test_cache_request_not_in_cache(self):
+ resp = self.req({})
+ assert not resp
+
+ def test_cache_request_fresh_max_age(self):
+ now = time.strftime(TIME_FMT, time.gmtime())
+ resp = Mock(headers={'cache-control': 'max-age=3600',
+ 'date': now})
+
+ cache = DictCache({self.url: resp})
+ self.c.cache = cache
+ r = self.req({})
+ assert r == resp
+
+ def test_cache_request_unfresh_max_age(self):
+ earlier = time.time() - 3700 # epoch - 1h01m40s
+ now = time.strftime(TIME_FMT, time.gmtime(earlier))
+ resp = Mock(headers={'cache-control': 'max-age=3600',
+ 'date': now})
+ self.c.cache = DictCache({self.url: resp})
+ r = self.req({})
+ assert not r
+
+ def test_cache_request_fresh_expires(self):
+ later = time.time() + 86400 # GMT + 1 day
+ expires = time.strftime(TIME_FMT, time.gmtime(later))
+ now = time.strftime(TIME_FMT, time.gmtime())
+ resp = Mock(headers={'expires': expires,
+ 'date': now})
+ cache = DictCache({self.url: resp})
+ self.c.cache = cache
+ r = self.req({})
+ assert r == resp
+
+ def test_cache_request_unfresh_expires(self):
+ sooner = time.time() - 86400 # GMT - 1 day
+ expires = time.strftime(TIME_FMT, time.gmtime(sooner))
+ now = time.strftime(TIME_FMT, time.gmtime())
+ resp = Mock(headers={'expires': expires,
+ 'date': now})
+ cache = DictCache({self.url: resp})
+ self.c.cache = cache
+ r = self.req({})
+ assert not r
+
+# VIM MODLINE
+# vim: ai ts=4 sw=4 sts=4 expandtab
+