# -*- coding: utf-8 -*-
-# Copyright (c) 2014-2015 Jack Kaliko <kaliko@azylum.org>
+# Copyright (c) 2014-2015, 2020, 2021 kaliko <kaliko@azylum.org>
# Copyright (c) 2012, 2013 Eric Larson <eric@ionrock.org>
#
# This program is free software: you can redistribute it and/or modify
import email.utils
-from requests import Session, Request, Timeout, ConnectionError
+from requests import Session, Request, Timeout, ConnectionError as HTTPConnectionError
from sima import SOCKET_TIMEOUT, WAIT_BETWEEN_REQUESTS
from sima.utils.utils import WSError, WSTimeout, WSHTTPError, Throttle
return (groups[1], groups[3], groups[4], groups[6], groups[8])
-class CacheController(object):
+class CacheController:
"""An interface to see if request should cached or not.
"""
+ CACHE_ANYWAY = False
+
def __init__(self, cache=None, cache_etags=True):
self.cache = cache or DictCache()
self.cache_etags = cache_etags
"""Normalize the URL to create a safe key for the cache"""
(scheme, authority, path, query, _) = parse_uri(uri)
if not scheme or not authority:
- raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
+ raise Exception(f'Only absolute URIs are allowed. uri = {uri}')
authority = authority.lower()
scheme = scheme.lower()
if not path:
# Could do syntax based normalization of the URI before
# computing the digest. See Section 6.2.2 of Std 66.
- request_uri = query and "?".join([path, query]) or path
+ request_uri = "?".join([path, query]) if query else path
scheme = scheme.lower()
defrag_uri = scheme + "://" + authority + request_uri
cc = self.parse_cache_control(request.headers)
# non-caching states
- no_cache = True if 'no-cache' in cc else False
+ no_cache = bool('no-cache' in cc)
if 'max-age' in cc and cc['max-age'] == 0:
no_cache = True
-
# see if it is in the cache anyways
in_cache = self.cache.get(cache_url)
if no_cache or not in_cache:
elif 'expires' in resp.headers:
if resp.headers['expires']:
self.cache.set(cache_url, resp)
+ # Force one month max age if no Cache-Control header is found
+ # Overriding header disappearance on LastFM web service...
+ # https://gitlab.com/kaliko/sima/-/issues/7
+ elif CacheController.CACHE_ANYWAY:
+ resp.headers['Cache-Control'] = 'max-age=2419200'
+ self.cache.set(cache_url, resp)
def update_cached_response(self, request, response):
"""On a 304 we will get a new set of headers that we want to
"""
self.stats = stats
self.controller = CacheController(cache)
+ self.sess = Session()
def __call__(self, ress, payload):
req = Request('GET', ress, params=payload,).prepare()
return cached_response
try:
return self.fetch_ws(req)
- except Timeout:
- raise WSTimeout('Failed to reach server within {0}s'.format(
- SOCKET_TIMEOUT))
- except ConnectionError as err:
- raise WSError(err)
+ except Timeout as err:
+ raise WSTimeout(f'Failed to reach server within {SOCKET_TIMEOUT}s') from err
+ except HTTPConnectionError as err:
+ raise WSError(err) from err
@Throttle(WAIT_BETWEEN_REQUESTS)
def fetch_ws(self, prepreq):
"""fetch from web service"""
- sess = Session()
- settings = sess.merge_environment_settings(prepreq.url, {}, None, False, None)
- resp = sess.send(prepreq, timeout=SOCKET_TIMEOUT, **settings)
+ settings = self.sess.merge_environment_settings(prepreq.url, {}, None, False, None)
+ resp = self.sess.send(prepreq, timeout=SOCKET_TIMEOUT, **settings)
if resp.status_code == 304:
self.stats.update(etag=self.stats.get('etag')+1)
resp = self.controller.update_cached_response(prepreq, resp)
elif resp.status_code != 200:
- raise WSHTTPError('{0.status_code}: {0.reason}'.format(resp))
- ratelimit = resp.headers.get('x-ratelimit-remaining', None)
- if ratelimit and self.stats:
- minrl = min(int(ratelimit), self.stats.get('minrl'))
- self.stats.update(minrl=minrl)
+ raise WSHTTPError(f'{resp.status_code}: {resp.reason}')
self.controller.cache_response(resp.request, resp)
return resp