From 3b790488058841bcb00180aa07277334a394b3ea Mon Sep 17 00:00:00 2001 From: kaliko Date: Fri, 21 Feb 2014 17:40:25 +0100 Subject: [PATCH] Add ETag support for echonest --- sima/lib/http.py | 60 +++++++++++++++++++++++++++++++++++++++++--- sima/lib/simaecho.py | 18 ++++++++----- sima/lib/simafm.py | 2 +- 3 files changed, 70 insertions(+), 10 deletions(-) diff --git a/sima/lib/http.py b/sima/lib/http.py index 1040c2a..8dbfe0d 100644 --- a/sima/lib/http.py +++ b/sima/lib/http.py @@ -58,6 +58,8 @@ class CacheController(object): if not path: path = "/" + # Order of params might changed + query = ''.join(sorted(query.split('&'))) # Could do syntax based normalization of the URI before # computing the digest. See Section 6.2.2 of Std 66. request_uri = query and "?".join([path, query]) or path @@ -173,11 +175,25 @@ class CacheController(object): resp.from_cache = True return resp - # we're not fresh. - self.cache.delete(cache_url) + # we're not fresh. If we don't have an Etag, clear it out + if 'etag' not in resp.headers: + self.cache.delete(cache_url) + + if 'etag' in resp.headers: + headers['If-None-Match'] = resp.headers['ETag'] + + if 'last-modified' in resp.headers: + headers['If-Modified-Since'] = resp.headers['Last-Modified'] + # return the original handler return False + def add_headers(self, url): + resp = self.cache.get(url) + if resp and 'etag' in resp.headers: + return {'If-None-Match': resp.headers['etag']} + return {} + def cache_response(self, request, resp): """ Algorithm for caching requests. @@ -199,10 +215,14 @@ class CacheController(object): if no_store and self.cache.get(cache_url): self.cache.delete(cache_url) + # If we've been given an etag, then keep the response + if self.cache_etags and 'etag' in resp.headers: + self.cache.set(cache_url, resp) + # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. - if 'date' in resp.headers: + elif 'date' in resp.headers: # cache when there is a max-age > 0 if cc_resp and cc_resp.get('max-age'): if int(cc_resp['max-age']) > 0: @@ -213,3 +233,37 @@ class CacheController(object): elif 'expires' in resp.headers: if resp.headers['expires']: self.cache.set(cache_url, resp) + + def update_cached_response(self, request, response): + """On a 304 we will get a new set of headers that we want to + update our cached value with, assuming we have one. + + This should only ever be called when we've sent an ETag and + gotten a 304 as the response. + """ + cache_url = self.cache_url(request.url) + + resp = self.cache.get(cache_url) + + if not resp: + # we didn't have a cached response + return response + + # did so lets update our headers + resp.headers.update(response.headers) + + # we want a 200 b/c we have content via the cache + request.status_code = 200 + + # update the request as it has the if-none-match header + any + # other headers that the server might have updated (ie Date, + # Cache-Control, Expires, etc.) + resp.request = request + + # update our cache + self.cache.set(cache_url, resp) + + # Let everyone know this was from the cache. + resp.from_cache = True + + return resp diff --git a/sima/lib/simaecho.py b/sima/lib/simaecho.py index 36c985e..27bbce2 100644 --- a/sima/lib/simaecho.py +++ b/sima/lib/simaecho.py @@ -25,7 +25,7 @@ __version__ = '0.0.2' __author__ = 'Jack Kaliko' -from datetime import datetime, timedelta +from datetime import timedelta from requests import Session, Request, Timeout, ConnectionError @@ -50,6 +50,7 @@ class SimaEch: ratelimit = None name = 'EchoNest' cache = False + stats = {'304':0, 'cached':0, 'minrl':'120'} def __init__(self): self.controller = CacheController(self.cache) @@ -64,6 +65,7 @@ class SimaEch: if self.cache: cached_response = self.controller.cached_request(req.url, req.headers) if cached_response: + SimaEch.stat.update(cached=SimaEch.stat.get('cached')+1) return cached_response.json() try: return self._fetch_ws(req) @@ -78,11 +80,16 @@ class SimaEch: """fetch from web service""" sess = Session() resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT) - self.__class__.ratelimit = resp.headers.get('x-ratelimit-remaining', None) - if resp.status_code is not 200: + if resp.status_code == 304: + SimaEch.stats.update({'304':SimaEch.stats.get('304')+1}) + resp = self.controller.update_cached_response(prepreq, resp) + elif resp.status_code != 200: raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp)) ans = resp.json() self._controls_answer(ans) + SimaEch.ratelimit = resp.headers.get('x-ratelimit-remaining', None) + minrl = min(SimaEch.ratelimit, SimaEch.stats.get('minrl')) + SimaEch.stats.update(minrl=minrl) if self.cache: self.controller.cache_response(resp.request, resp) return ans @@ -130,7 +137,6 @@ class SimaEch: ressource = '{0}/artist/similar'.format(SimaEch.root_url) ans = self._fetch(ressource, payload) for art in ans.get('response').get('artists'): - artist = {} mbid = None if 'foreign_ids' in art: for frgnid in art.get('foreign_ids'): @@ -147,7 +153,7 @@ class SimaEch: ressource = '{0}/song/search'.format(SimaEch.root_url) ans = self._fetch(ressource, payload) titles = list() - artist = { + art = { 'artist': artist.name, 'musicbrainz_artistid': artist.mbid, } @@ -155,7 +161,7 @@ class SimaEch: title = song.get('title') if title not in titles: titles.append(title) - yield Track(title=title, **artist) + yield Track(title=title, **art) # VIM MODLINE diff --git a/sima/lib/simafm.py b/sima/lib/simafm.py index 5dafd2c..f54ab01 100644 --- a/sima/lib/simafm.py +++ b/sima/lib/simafm.py @@ -25,7 +25,7 @@ __version__ = '0.5.1' __author__ = 'Jack Kaliko' -from datetime import datetime, timedelta +from datetime import timedelta from requests import Session, Request, Timeout, ConnectionError -- 2.39.2