X-Git-Url: https://git.kaliko.me/?a=blobdiff_plain;f=sima%2Flib%2Fhttp.py;h=5014516e82cc228b06b421d2372a6f04eeb95f62;hb=HEAD;hp=10fae9c475204852b73eabe039447d4d5d4497e4;hpb=f3df46ac87d5f9d1824156d9fed605dd03f3ccde;p=mpd-sima.git diff --git a/sima/lib/http.py b/sima/lib/http.py index 10fae9c..5014516 100644 --- a/sima/lib/http.py +++ b/sima/lib/http.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright (c) 2014-2015 Jack Kaliko +# Copyright (c) 2014-2015, 2020, 2021 kaliko # Copyright (c) 2012, 2013 Eric Larson # # This program is free software: you can redistribute it and/or modify @@ -26,7 +26,7 @@ import time import email.utils -from requests import Session, Request, Timeout, ConnectionError +from requests import Session, Request, Timeout, ConnectionError as HTTPConnectionError from sima import SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS from sima.utils.utils import WSError, WSTimeout, WSHTTPError, Throttle @@ -45,9 +45,11 @@ def parse_uri(uri): return (groups[1], groups[3], groups[4], groups[6], groups[8]) -class CacheController(object): +class CacheController: """An interface to see if request should cached or not. """ + CACHE_ANYWAY = False + def __init__(self, cache=None, cache_etags=True): self.cache = cache or DictCache() self.cache_etags = cache_etags @@ -56,7 +58,7 @@ class CacheController(object): """Normalize the URL to create a safe key for the cache""" (scheme, authority, path, query, _) = parse_uri(uri) if not scheme or not authority: - raise Exception("Only absolute URIs are allowed. uri = %s" % uri) + raise Exception(f'Only absolute URIs are allowed. uri = {uri}') authority = authority.lower() scheme = scheme.lower() if not path: @@ -64,7 +66,7 @@ class CacheController(object): # Could do syntax based normalization of the URI before # computing the digest. See Section 6.2.2 of Std 66. - request_uri = query and "?".join([path, query]) or path + request_uri = "?".join([path, query]) if query else path scheme = scheme.lower() defrag_uri = scheme + "://" + authority + request_uri @@ -99,10 +101,9 @@ class CacheController(object): cc = self.parse_cache_control(request.headers) # non-caching states - no_cache = True if 'no-cache' in cc else False + no_cache = bool('no-cache' in cc) if 'max-age' in cc and cc['max-age'] == 0: no_cache = True - # see if it is in the cache anyways in_cache = self.cache.get(cache_url) if no_cache or not in_cache: @@ -190,12 +191,6 @@ class CacheController(object): # return the original handler return False - def add_headers(self, url): - resp = self.cache.get(url) - if resp and 'etag' in resp.headers: - return {'If-None-Match': resp.headers['etag']} - return {} - def cache_response(self, request, resp): """ Algorithm for caching requests. @@ -235,6 +230,12 @@ class CacheController(object): elif 'expires' in resp.headers: if resp.headers['expires']: self.cache.set(cache_url, resp) + # Force one month max age if no Cache-Control header is found + # Overriding header disappearance on LastFM web service... + # https://gitlab.com/kaliko/sima/-/issues/7 + elif CacheController.CACHE_ANYWAY: + resp.headers['Cache-Control'] = 'max-age=2419200' + self.cache.set(cache_url, resp) def update_cached_response(self, request, response): """On a 304 we will get a new set of headers that we want to @@ -279,6 +280,7 @@ class HttpClient: """ self.stats = stats self.controller = CacheController(cache) + self.sess = Session() def __call__(self, ress, payload): req = Request('GET', ress, params=payload,).prepare() @@ -291,27 +293,21 @@ class HttpClient: return cached_response try: return self.fetch_ws(req) - except Timeout: - raise WSTimeout('Failed to reach server within {0}s'.format( - SOCKET_TIMEOUT)) - except ConnectionError as err: - raise WSError(err) + except Timeout as err: + raise WSTimeout(f'Failed to reach server within {SOCKET_TIMEOUT}s') from err + except HTTPConnectionError as err: + raise WSError(err) from err @Throttle(WAIT_BETWEEN_REQUESTS) def fetch_ws(self, prepreq): """fetch from web service""" - sess = Session() - settings = sess.merge_environment_settings(prepreq.url, {}, None, False, None) - resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT, **settings) + settings = self.sess.merge_environment_settings(prepreq.url, {}, None, False, None) + resp = self.sess.send(prepreq, timeout=SOCKET_TIMEOUT, **settings) if resp.status_code == 304: self.stats.update(etag=self.stats.get('etag')+1) resp = self.controller.update_cached_response(prepreq, resp) elif resp.status_code != 200: - raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp)) - ratelimit = resp.headers.get('x-ratelimit-remaining', None) - if ratelimit and self.stats: - minrl = min(int(ratelimit), self.stats.get('minrl')) - self.stats.update(minrl=minrl) + raise WSHTTPError(f'{resp.status_code}: {resp.reason}') self.controller.cache_response(resp.request, resp) return resp