From 7d24f8a6f0bb74de07c3e6d333a2948d9186be1b Mon Sep 17 00:00:00 2001 From: kaliko Date: Sun, 29 Jun 2014 21:34:10 +0200 Subject: [PATCH] http client/cache controller refactoring Add unit test as well. --- doc/Changelog | 1 + sima/lib/http.py | 63 ++++++++++++++++++++++++++++--- sima/lib/simaecho.py | 69 ++++++++-------------------------- sima/lib/simafm.py | 61 ++++++------------------------ tests/test_http.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 173 insertions(+), 109 deletions(-) create mode 100644 tests/test_http.py diff --git a/doc/Changelog b/doc/Changelog index 0920048..ba9e9b0 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -2,6 +2,7 @@ MPD_sima v0.12.2 * Add some randomness to track selection * Do not queue artist already queued (regression) + * Some refactoring in http client * … -- kaliko jack UNRELEASED diff --git a/sima/lib/http.py b/sima/lib/http.py index c07ad38..0c1b396 100644 --- a/sima/lib/http.py +++ b/sima/lib/http.py @@ -26,6 +26,10 @@ import time import email.utils +from requests import Session, Request, Timeout, ConnectionError + +from sima import SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS +from sima.utils.utils import WSError, WSTimeout, WSHTTPError, Throttle from .cache import DictCache @@ -88,11 +92,11 @@ class CacheController(object): retval = dict(parts_with_args + parts_wo_args) return retval - def cached_request(self, url, headers): + def cached_request(self, request): """Return the cached resquest if available and fresh """ - cache_url = self.cache_url(url) - cc = self.parse_cache_control(headers) + cache_url = self.cache_url(request.url) + cc = self.parse_cache_control(request.headers) # non-caching states no_cache = True if 'no-cache' in cc else False @@ -125,7 +129,7 @@ class CacheController(object): for header in varied_headers: # If our headers don't match for the headers listed in # the vary header, then don't use the cached response - if headers.get(header, None) != original_headers.get(header): + if request.headers.get(header, None) != original_headers.get(header): return False now = time.time() @@ -178,10 +182,10 @@ class CacheController(object): self.cache.delete(cache_url) if 'etag' in resp.headers: - headers['If-None-Match'] = resp.headers['ETag'] + request.headers['If-None-Match'] = resp.headers['ETag'] if 'last-modified' in resp.headers: - headers['If-Modified-Since'] = resp.headers['Last-Modified'] + request.headers['If-Modified-Since'] = resp.headers['Last-Modified'] # return the original handler return False @@ -265,3 +269,50 @@ class CacheController(object): resp.from_cache = True return resp + + +class HttpClient: + def __init__(self, cache=None, stats=None): + """ + Prepare http request + Use cached elements or proceed http request + """ + self.stats = stats + self.controller = CacheController(cache) + + def __call__(self, ress, payload): + req = Request('GET', ress, params=payload,).prepare() + if self.stats: + self.stats.update(total=self.stats.get('total')+1) + cached_response = self.controller.cached_request(req) + if cached_response: + if self.stats: + self.stats.update(ccontrol=self.stats.get('ccontrol')+1) + return cached_response + try: + return self.fetch_ws(req) + except Timeout: + raise WSTimeout('Failed to reach server within {0}s'.format( + SOCKET_TIMEOUT)) + except ConnectionError as err: + raise WSError(err) + + @Throttle(WAIT_BETWEEN_REQUESTS) + def fetch_ws(self, prepreq): + """fetch from web service""" + sess = Session() + resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT) + if resp.status_code == 304: + self.stats.update(etag=self.stats.get('etag')+1) + resp = self.controller.update_cached_response(prepreq, resp) + elif resp.status_code != 200: + raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp)) + ratelimit = resp.headers.get('x-ratelimit-remaining', None) + if ratelimit and self.stats: + minrl = min(int(ratelimit), self.stats.get('minrl')) + self.stats.update(minrl=minrl) + self.controller.cache_response(resp.request, resp) + return resp + +# VIM MODLINE +# vim: ai ts=4 sw=4 sts=4 expandtab diff --git a/sima/lib/simaecho.py b/sima/lib/simaecho.py index e5fe485..3f649a6 100644 --- a/sima/lib/simaecho.py +++ b/sima/lib/simaecho.py @@ -21,18 +21,17 @@ Consume EchoNest web service """ -__version__ = '0.0.2' +__version__ = '0.0.3' __author__ = 'Jack Kaliko' -from requests import Session, Request, Timeout, ConnectionError -from sima import ECH, SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS +from sima import ECH from sima.lib.meta import Artist from sima.lib.track import Track -from sima.lib.http import CacheController -from sima.utils.utils import WSError, WSNotFound, WSTimeout, WSHTTPError -from sima.utils.utils import getws, Throttle +from sima.lib.http import HttpClient +from sima.utils.utils import WSError, WSNotFound +from sima.utils.utils import getws if len(ECH.get('apikey')) == 23: # simple hack allowing imp.reload getws(ECH) @@ -45,52 +44,12 @@ class SimaEch: name = 'EchoNest' cache = False stats = {'etag':0, - 'ccontrol':0, - 'minrl':120, - 'total':0} + 'ccontrol':0, + 'minrl':120, + 'total':0} def __init__(self): - self.controller = CacheController(self.cache) - - def _fetch(self, ressource, payload): - """ - Prepare http request - Use cached elements or proceed http request - """ - req = Request('GET', ressource, params=payload, - ).prepare() - SimaEch.stats.update(total=SimaEch.stats.get('total')+1) - if self.cache: - cached_response = self.controller.cached_request(req.url, req.headers) - if cached_response: - SimaEch.stats.update(ccontrol=SimaEch.stats.get('ccontrol')+1) - return cached_response.json() - try: - return self._fetch_ws(req) - except Timeout: - raise WSTimeout('Failed to reach server within {0}s'.format( - SOCKET_TIMEOUT)) - except ConnectionError as err: - raise WSError(err) - - @Throttle(WAIT_BETWEEN_REQUESTS) - def _fetch_ws(self, prepreq): - """fetch from web service""" - sess = Session() - resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT) - if resp.status_code == 304: - SimaEch.stats.update(etag=SimaEch.stats.get('etag')+1) - resp = self.controller.update_cached_response(prepreq, resp) - elif resp.status_code != 200: - raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp)) - ans = resp.json() - self._controls_answer(ans) - SimaEch.ratelimit = resp.headers.get('x-ratelimit-remaining', None) - minrl = min(int(SimaEch.ratelimit), SimaEch.stats.get('minrl')) - SimaEch.stats.update(minrl=minrl) - if self.cache: - self.controller.cache_response(resp.request, resp) - return ans + self.http = HttpClient(cache=self.cache, stats=self.stats) def _controls_answer(self, ans): """Controls answer. @@ -135,8 +94,9 @@ class SimaEch: payload = self._forge_payload(artist) # Construct URL ressource = '{0}/artist/similar'.format(SimaEch.root_url) - ans = self._fetch(ressource, payload) - for art in ans.get('response').get('artists'): + ans = self.http(ressource, payload) + self._controls_answer(ans.json()) + for art in ans.json().get('response').get('artists'): mbid = None if 'foreign_ids' in art: for frgnid in art.get('foreign_ids'): @@ -151,13 +111,14 @@ class SimaEch: payload = self._forge_payload(artist, top=True) # Construct URL ressource = '{0}/song/search'.format(SimaEch.root_url) - ans = self._fetch(ressource, payload) + ans = self.http(ressource, payload) + self._controls_answer(ans.json()) titles = list() art = { 'artist': artist.name, 'musicbrainz_artistid': artist.mbid, } - for song in ans.get('response').get('songs'): + for song in ans.json().get('response').get('songs'): title = song.get('title') if title not in titles: titles.append(title) diff --git a/sima/lib/simafm.py b/sima/lib/simafm.py index 0988266..236efe7 100644 --- a/sima/lib/simafm.py +++ b/sima/lib/simafm.py @@ -26,15 +26,13 @@ __author__ = 'Jack Kaliko' -from requests import Session, Request, Timeout, ConnectionError - -from sima import LFM, SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS +from sima import LFM from sima.lib.meta import Artist from sima.lib.track import Track -from sima.lib.http import CacheController -from sima.utils.utils import WSError, WSNotFound, WSTimeout, WSHTTPError -from sima.utils.utils import getws, Throttle +from sima.lib.http import HttpClient +from sima.utils.utils import WSError, WSNotFound +from sima.utils.utils import getws if len(LFM.get('apikey')) == 43: # simple hack allowing imp.reload getws(LFM) @@ -51,46 +49,9 @@ class SimaFM: 'total':0} def __init__(self): - self.controller = CacheController(self.cache) + self.http = HttpClient(cache=self.cache, stats=self.stats) self.artist = None - def _fetch(self, payload): - """ - Prepare http request - Use cached elements or proceed http request - """ - req = Request('GET', SimaFM.root_url, params=payload, - ).prepare() - SimaFM.stats.update(total=SimaFM.stats.get('total')+1) - if self.cache: - cached_response = self.controller.cached_request(req.url, req.headers) - if cached_response: - SimaFM.stats.update(ccontrol=SimaFM.stats.get('ccontrol')+1) - return cached_response.json() - try: - return self._fetch_ws(req) - except Timeout: - raise WSTimeout('Failed to reach server within {0}s'.format( - SOCKET_TIMEOUT)) - except ConnectionError as err: - raise WSError(err) - - @Throttle(WAIT_BETWEEN_REQUESTS) - def _fetch_ws(self, prepreq): - """fetch from web service""" - sess = Session() - resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT) - if resp.status_code == 304: - SimaFM.stats.update(etag=SimaFM.stats.get('etag')+1) - resp = self.controller.update_cached_response(prepreq, resp) - elif resp.status_code != 200: - raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp)) - ans = resp.json() - self._controls_answer(ans) - if self.cache: - self.controller.cache_response(resp.request, resp) - return ans - def _controls_answer(self, ans): """Controls answer. """ @@ -132,25 +93,27 @@ class SimaFM: """ payload = self._forge_payload(artist) # Construct URL - ans = self._fetch(payload) + ans = self.http(self.root_url, payload) + self._controls_answer(ans.json()) # Artist might be found be return no 'artist' list… # cf. "Mulatu Astatqe" vs. "Mulatu Astatqé" with autocorrect=0 # json format is broken IMHO, xml is more consistent IIRC # Here what we got: # >>> {"similarartists":{"#text":"\n","artist":"Mulatu Astatqe"}} # autocorrect=1 should fix it, checking anyway. - simarts = ans.get('similarartists').get('artist') + simarts = ans.json().get('similarartists').get('artist') if not isinstance(simarts, list): raise WSError('Artist found but no similarities returned') - for art in ans.get('similarartists').get('artist'): + for art in ans.json().get('similarartists').get('artist'): yield Artist(name=art.get('name'), mbid=art.get('mbid', None)) def get_toptrack(self, artist=None): """Fetch artist top tracks """ payload = self._forge_payload(artist, method='top') - ans = self._fetch(payload) - tops = ans.get('toptracks').get('track') + ans = self.http(self.root_url, payload) + self._controls_answer(ans.json()) + tops = ans.json().get('toptracks').get('track') art = { 'artist': artist.name, 'musicbrainz_artistid': artist.mbid, diff --git a/tests/test_http.py b/tests/test_http.py new file mode 100644 index 0000000..d6c4388 --- /dev/null +++ b/tests/test_http.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +import unittest +import time + +from unittest.mock import Mock + +from sima.lib.cache import DictCache +from sima.lib.http import CacheController + +TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" + + +class TestCacheControlRequest(unittest.TestCase): + url = 'http://foo.com/bar' + + def setUp(self): + self.c = CacheController( + DictCache(), + ) + + def req(self, headers): + return self.c.cached_request(Mock(url=self.url, headers=headers)) + + def test_cache_request_no_cache(self): + resp = self.req({'cache-control': 'no-cache'}) + assert not resp + + def test_cache_request_pragma_no_cache(self): + resp = self.req({'pragma': 'no-cache'}) + assert not resp + + def test_cache_request_no_store(self): + resp = self.req({'cache-control': 'no-store'}) + assert not resp + + def test_cache_request_max_age_0(self): + resp = self.req({'cache-control': 'max-age=0'}) + assert not resp + + def test_cache_request_not_in_cache(self): + resp = self.req({}) + assert not resp + + def test_cache_request_fresh_max_age(self): + now = time.strftime(TIME_FMT, time.gmtime()) + resp = Mock(headers={'cache-control': 'max-age=3600', + 'date': now}) + + cache = DictCache({self.url: resp}) + self.c.cache = cache + r = self.req({}) + assert r == resp + + def test_cache_request_unfresh_max_age(self): + earlier = time.time() - 3700 # epoch - 1h01m40s + now = time.strftime(TIME_FMT, time.gmtime(earlier)) + resp = Mock(headers={'cache-control': 'max-age=3600', + 'date': now}) + self.c.cache = DictCache({self.url: resp}) + r = self.req({}) + assert not r + + def test_cache_request_fresh_expires(self): + later = time.time() + 86400 # GMT + 1 day + expires = time.strftime(TIME_FMT, time.gmtime(later)) + now = time.strftime(TIME_FMT, time.gmtime()) + resp = Mock(headers={'expires': expires, + 'date': now}) + cache = DictCache({self.url: resp}) + self.c.cache = cache + r = self.req({}) + assert r == resp + + def test_cache_request_unfresh_expires(self): + sooner = time.time() - 86400 # GMT - 1 day + expires = time.strftime(TIME_FMT, time.gmtime(sooner)) + now = time.strftime(TIME_FMT, time.gmtime()) + resp = Mock(headers={'expires': expires, + 'date': now}) + cache = DictCache({self.url: resp}) + self.c.cache = cache + r = self.req({}) + assert not r + +# VIM MODLINE +# vim: ai ts=4 sw=4 sts=4 expandtab + -- 2.39.2