]> kaliko git repositories - mpd-sima.git/commitdiff
http client/cache controller refactoring
authorkaliko <kaliko@azylum.org>
Sun, 29 Jun 2014 19:34:10 +0000 (21:34 +0200)
committerkaliko <kaliko@azylum.org>
Sun, 29 Jun 2014 19:37:29 +0000 (21:37 +0200)
Add unit test as well.

doc/Changelog
sima/lib/http.py
sima/lib/simaecho.py
sima/lib/simafm.py
tests/test_http.py [new file with mode: 0644]

index 092004837d438b1035c183fa980b6cbf2f2aff20..ba9e9b0473ef5b5ef87bcc8a68743b9773520062 100644 (file)
@@ -2,6 +2,7 @@ MPD_sima v0.12.2
 
  * Add some randomness to track selection
  * Do not queue artist already queued (regression)
+ * Some refactoring in http client
  * …
 
 -- kaliko jack <kaliko@azylum.org>  UNRELEASED
index c07ad38e7bd96576a1fe2c972653d6c157e74c1e..0c1b3968d90ce31bfcb13fbadd4c948c22d2289a 100644 (file)
@@ -26,6 +26,10 @@ import time
 
 import email.utils
 
+from requests import Session, Request, Timeout, ConnectionError
+
+from sima import SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS
+from sima.utils.utils import WSError, WSTimeout, WSHTTPError, Throttle
 from .cache import DictCache
 
 
@@ -88,11 +92,11 @@ class CacheController(object):
             retval = dict(parts_with_args + parts_wo_args)
         return retval
 
-    def cached_request(self, url, headers):
+    def cached_request(self, request):
         """Return the cached resquest if available and fresh
         """
-        cache_url = self.cache_url(url)
-        cc = self.parse_cache_control(headers)
+        cache_url = self.cache_url(request.url)
+        cc = self.parse_cache_control(request.headers)
 
         # non-caching states
         no_cache = True if 'no-cache' in cc else False
@@ -125,7 +129,7 @@ class CacheController(object):
             for header in varied_headers:
                 # If our headers don't match for the headers listed in
                 # the vary header, then don't use the cached response
-                if headers.get(header, None) != original_headers.get(header):
+                if request.headers.get(header, None) != original_headers.get(header):
                     return False
 
         now = time.time()
@@ -178,10 +182,10 @@ class CacheController(object):
             self.cache.delete(cache_url)
 
         if 'etag' in resp.headers:
-            headers['If-None-Match'] = resp.headers['ETag']
+            request.headers['If-None-Match'] = resp.headers['ETag']
 
         if 'last-modified' in resp.headers:
-            headers['If-Modified-Since'] = resp.headers['Last-Modified']
+            request.headers['If-Modified-Since'] = resp.headers['Last-Modified']
 
         # return the original handler
         return False
@@ -265,3 +269,50 @@ class CacheController(object):
         resp.from_cache = True
 
         return resp
+
+
+class HttpClient:
+    def __init__(self, cache=None, stats=None):
+        """
+        Prepare http request
+        Use cached elements or proceed http request
+        """
+        self.stats = stats
+        self.controller = CacheController(cache)
+
+    def __call__(self, ress, payload):
+        req = Request('GET', ress, params=payload,).prepare()
+        if self.stats:
+            self.stats.update(total=self.stats.get('total')+1)
+        cached_response = self.controller.cached_request(req)
+        if cached_response:
+            if self.stats:
+                self.stats.update(ccontrol=self.stats.get('ccontrol')+1)
+            return cached_response
+        try:
+            return self.fetch_ws(req)
+        except Timeout:
+            raise WSTimeout('Failed to reach server within {0}s'.format(
+                SOCKET_TIMEOUT))
+        except ConnectionError as err:
+            raise WSError(err)
+
+    @Throttle(WAIT_BETWEEN_REQUESTS)
+    def fetch_ws(self, prepreq):
+        """fetch from web service"""
+        sess = Session()
+        resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT)
+        if resp.status_code == 304:
+            self.stats.update(etag=self.stats.get('etag')+1)
+            resp = self.controller.update_cached_response(prepreq, resp)
+        elif resp.status_code != 200:
+            raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp))
+        ratelimit = resp.headers.get('x-ratelimit-remaining', None)
+        if ratelimit and self.stats:
+            minrl = min(int(ratelimit), self.stats.get('minrl'))
+            self.stats.update(minrl=minrl)
+        self.controller.cache_response(resp.request, resp)
+        return resp
+
+# VIM MODLINE
+# vim: ai ts=4 sw=4 sts=4 expandtab
index e5fe4852566c41ca9fbc949f5c0351a21cf7f4ad..3f649a6575faf13849006403dd7e1493eb997641 100644 (file)
 Consume EchoNest web service
 """
 
-__version__ = '0.0.2'
+__version__ = '0.0.3'
 __author__ = 'Jack Kaliko'
 
 
-from requests import Session, Request, Timeout, ConnectionError
 
-from sima import ECH, SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS
+from sima import ECH
 from sima.lib.meta import Artist
 from sima.lib.track import Track
-from sima.lib.http import CacheController
-from sima.utils.utils import WSError, WSNotFound, WSTimeout, WSHTTPError
-from sima.utils.utils import getws, Throttle
+from sima.lib.http import HttpClient
+from sima.utils.utils import WSError, WSNotFound
+from sima.utils.utils import getws
 if len(ECH.get('apikey')) == 23:  # simple hack allowing imp.reload
     getws(ECH)
 
@@ -45,52 +44,12 @@ class SimaEch:
     name = 'EchoNest'
     cache = False
     stats = {'etag':0,
-            'ccontrol':0,
-            'minrl':120,
-            'total':0}
+             'ccontrol':0,
+             'minrl':120,
+             'total':0}
 
     def __init__(self):
-        self.controller = CacheController(self.cache)
-
-    def _fetch(self, ressource, payload):
-        """
-        Prepare http request
-        Use cached elements or proceed http request
-        """
-        req = Request('GET', ressource, params=payload,
-                      ).prepare()
-        SimaEch.stats.update(total=SimaEch.stats.get('total')+1)
-        if self.cache:
-            cached_response = self.controller.cached_request(req.url, req.headers)
-            if cached_response:
-                SimaEch.stats.update(ccontrol=SimaEch.stats.get('ccontrol')+1)
-                return cached_response.json()
-        try:
-            return self._fetch_ws(req)
-        except Timeout:
-            raise WSTimeout('Failed to reach server within {0}s'.format(
-                               SOCKET_TIMEOUT))
-        except ConnectionError as err:
-            raise WSError(err)
-
-    @Throttle(WAIT_BETWEEN_REQUESTS)
-    def _fetch_ws(self, prepreq):
-        """fetch from web service"""
-        sess = Session()
-        resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT)
-        if resp.status_code == 304:
-            SimaEch.stats.update(etag=SimaEch.stats.get('etag')+1)
-            resp = self.controller.update_cached_response(prepreq, resp)
-        elif resp.status_code != 200:
-            raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp))
-        ans = resp.json()
-        self._controls_answer(ans)
-        SimaEch.ratelimit = resp.headers.get('x-ratelimit-remaining', None)
-        minrl = min(int(SimaEch.ratelimit), SimaEch.stats.get('minrl'))
-        SimaEch.stats.update(minrl=minrl)
-        if self.cache:
-            self.controller.cache_response(resp.request, resp)
-        return ans
+        self.http = HttpClient(cache=self.cache, stats=self.stats)
 
     def _controls_answer(self, ans):
         """Controls answer.
@@ -135,8 +94,9 @@ class SimaEch:
         payload = self._forge_payload(artist)
         # Construct URL
         ressource = '{0}/artist/similar'.format(SimaEch.root_url)
-        ans = self._fetch(ressource, payload)
-        for art in ans.get('response').get('artists'):
+        ans = self.http(ressource, payload)
+        self._controls_answer(ans.json())
+        for art in ans.json().get('response').get('artists'):
             mbid = None
             if 'foreign_ids' in art:
                 for frgnid in art.get('foreign_ids'):
@@ -151,13 +111,14 @@ class SimaEch:
         payload = self._forge_payload(artist, top=True)
         # Construct URL
         ressource = '{0}/song/search'.format(SimaEch.root_url)
-        ans = self._fetch(ressource, payload)
+        ans = self.http(ressource, payload)
+        self._controls_answer(ans.json())
         titles = list()
         art = {
                 'artist': artist.name,
                 'musicbrainz_artistid': artist.mbid,
                 }
-        for song in ans.get('response').get('songs'):
+        for song in ans.json().get('response').get('songs'):
             title = song.get('title')
             if title not in titles:
                 titles.append(title)
index 0988266e37c2f0f84121a781f6d8893b7abe5d68..236efe725cb9458dbf7f50531ffed66f0c92d156 100644 (file)
@@ -26,15 +26,13 @@ __author__ = 'Jack Kaliko'
 
 
 
-from requests import Session, Request, Timeout, ConnectionError
-
-from sima import LFM, SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS
+from sima import LFM
 from sima.lib.meta import Artist
 from sima.lib.track import Track
 
-from sima.lib.http import CacheController
-from sima.utils.utils import WSError, WSNotFound, WSTimeout, WSHTTPError
-from sima.utils.utils import getws, Throttle
+from sima.lib.http import HttpClient
+from sima.utils.utils import WSError, WSNotFound
+from sima.utils.utils import getws
 if len(LFM.get('apikey')) == 43:  # simple hack allowing imp.reload
     getws(LFM)
 
@@ -51,46 +49,9 @@ class SimaFM:
             'total':0}
 
     def __init__(self):
-        self.controller = CacheController(self.cache)
+        self.http = HttpClient(cache=self.cache, stats=self.stats)
         self.artist = None
 
-    def _fetch(self, payload):
-        """
-        Prepare http request
-        Use cached elements or proceed http request
-        """
-        req = Request('GET', SimaFM.root_url, params=payload,
-                      ).prepare()
-        SimaFM.stats.update(total=SimaFM.stats.get('total')+1)
-        if self.cache:
-            cached_response = self.controller.cached_request(req.url, req.headers)
-            if cached_response:
-                SimaFM.stats.update(ccontrol=SimaFM.stats.get('ccontrol')+1)
-                return cached_response.json()
-        try:
-            return self._fetch_ws(req)
-        except Timeout:
-            raise WSTimeout('Failed to reach server within {0}s'.format(
-                               SOCKET_TIMEOUT))
-        except ConnectionError as err:
-            raise WSError(err)
-
-    @Throttle(WAIT_BETWEEN_REQUESTS)
-    def _fetch_ws(self, prepreq):
-        """fetch from web service"""
-        sess = Session()
-        resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT)
-        if resp.status_code == 304:
-            SimaFM.stats.update(etag=SimaFM.stats.get('etag')+1)
-            resp = self.controller.update_cached_response(prepreq, resp)
-        elif resp.status_code != 200:
-            raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp))
-        ans = resp.json()
-        self._controls_answer(ans)
-        if self.cache:
-            self.controller.cache_response(resp.request, resp)
-        return ans
-
     def _controls_answer(self, ans):
         """Controls answer.
         """
@@ -132,25 +93,27 @@ class SimaFM:
         """
         payload = self._forge_payload(artist)
         # Construct URL
-        ans = self._fetch(payload)
+        ans = self.http(self.root_url, payload)
+        self._controls_answer(ans.json())
         # Artist might be found be return no 'artist' list…
         # cf. "Mulatu Astatqe" vs. "Mulatu Astatqé" with autocorrect=0
         # json format is broken IMHO, xml is more consistent IIRC
         # Here what we got:
         # >>> {"similarartists":{"#text":"\n","artist":"Mulatu Astatqe"}}
         # autocorrect=1 should fix it, checking anyway.
-        simarts = ans.get('similarartists').get('artist')
+        simarts = ans.json().get('similarartists').get('artist')
         if not isinstance(simarts, list):
             raise WSError('Artist found but no similarities returned')
-        for art in ans.get('similarartists').get('artist'):
+        for art in ans.json().get('similarartists').get('artist'):
             yield Artist(name=art.get('name'), mbid=art.get('mbid', None))
 
     def get_toptrack(self, artist=None):
         """Fetch artist top tracks
         """
         payload = self._forge_payload(artist, method='top')
-        ans = self._fetch(payload)
-        tops = ans.get('toptracks').get('track')
+        ans = self.http(self.root_url, payload)
+        self._controls_answer(ans.json())
+        tops = ans.json().get('toptracks').get('track')
         art = {
                 'artist': artist.name,
                 'musicbrainz_artistid': artist.mbid,
diff --git a/tests/test_http.py b/tests/test_http.py
new file mode 100644 (file)
index 0000000..d6c4388
--- /dev/null
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import time
+
+from unittest.mock import Mock
+
+from sima.lib.cache import DictCache
+from sima.lib.http import CacheController
+
+TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
+
+
+class TestCacheControlRequest(unittest.TestCase):
+    url = 'http://foo.com/bar'
+
+    def setUp(self):
+        self.c = CacheController(
+            DictCache(),
+        )
+
+    def req(self, headers):
+        return self.c.cached_request(Mock(url=self.url, headers=headers))
+
+    def test_cache_request_no_cache(self):
+        resp = self.req({'cache-control': 'no-cache'})
+        assert not resp
+
+    def test_cache_request_pragma_no_cache(self):
+        resp = self.req({'pragma': 'no-cache'})
+        assert not resp
+
+    def test_cache_request_no_store(self):
+        resp = self.req({'cache-control': 'no-store'})
+        assert not resp
+
+    def test_cache_request_max_age_0(self):
+        resp = self.req({'cache-control': 'max-age=0'})
+        assert not resp
+
+    def test_cache_request_not_in_cache(self):
+        resp = self.req({})
+        assert not resp
+
+    def test_cache_request_fresh_max_age(self):
+        now = time.strftime(TIME_FMT, time.gmtime())
+        resp = Mock(headers={'cache-control': 'max-age=3600',
+                             'date': now})
+
+        cache = DictCache({self.url: resp})
+        self.c.cache = cache
+        r = self.req({})
+        assert r == resp
+
+    def test_cache_request_unfresh_max_age(self):
+        earlier = time.time() - 3700  # epoch - 1h01m40s
+        now = time.strftime(TIME_FMT, time.gmtime(earlier))
+        resp = Mock(headers={'cache-control': 'max-age=3600',
+                             'date': now})
+        self.c.cache = DictCache({self.url: resp})
+        r = self.req({})
+        assert not r
+
+    def test_cache_request_fresh_expires(self):
+        later = time.time() + 86400  # GMT + 1 day
+        expires = time.strftime(TIME_FMT, time.gmtime(later))
+        now = time.strftime(TIME_FMT, time.gmtime())
+        resp = Mock(headers={'expires': expires,
+                             'date': now})
+        cache = DictCache({self.url: resp})
+        self.c.cache = cache
+        r = self.req({})
+        assert r == resp
+
+    def test_cache_request_unfresh_expires(self):
+        sooner = time.time() - 86400  # GMT - 1 day
+        expires = time.strftime(TIME_FMT, time.gmtime(sooner))
+        now = time.strftime(TIME_FMT, time.gmtime())
+        resp = Mock(headers={'expires': expires,
+                             'date': now})
+        cache = DictCache({self.url: resp})
+        self.c.cache = cache
+        r = self.req({})
+        assert not r
+
+# VIM MODLINE
+# vim: ai ts=4 sw=4 sts=4 expandtab
+