From: kaliko Date: Fri, 21 Feb 2014 00:53:07 +0000 (+0100) Subject: Integrate persistent cache with "var_dir" conf option X-Git-Tag: mpd-sima/0.12.0pr4~13 X-Git-Url: https://git.kaliko.me/?a=commitdiff_plain;h=71500abd7ef16784d027a8a20aa28b06e8a13a4f;p=mpd-sima.git Integrate persistent cache with "var_dir" conf option --- diff --git a/sima/lib/httpcli/cache.py b/sima/lib/cache.py similarity index 66% rename from sima/lib/httpcli/cache.py rename to sima/lib/cache.py index 22c751d..ebed3fc 100644 --- a/sima/lib/httpcli/cache.py +++ b/sima/lib/cache.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2014 Jack Kaliko +# Copyright (c) 2012, 2013 Eric Larson +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# """ The cache object API for implementing caches. The default is just a dictionary, which in turns means it is not threadsafe for writing. @@ -11,7 +30,7 @@ from hashlib import md5 from pickle import load, dump from threading import Lock -from .filelock import FileLock +from ..utils.filelock import FileLock class BaseCache: diff --git a/sima/lib/httpcli/controller.py b/sima/lib/http.py similarity index 83% rename from sima/lib/httpcli/controller.py rename to sima/lib/http.py index c447895..1040c2a 100644 --- a/sima/lib/httpcli/controller.py +++ b/sima/lib/http.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2014 Jack Kaliko +# Copyright (c) 2012, 2013 Eric Larson +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# """ The httplib2 algorithms ported for use with requests. """ @@ -5,9 +24,10 @@ import re import calendar import time -from sima.lib.httpcli.cache import DictCache import email.utils +from .cache import DictCache + URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") @@ -30,7 +50,7 @@ class CacheController(object): def _urlnorm(self, uri): """Normalize the URL to create a safe key for the cache""" - (scheme, authority, path, query, fragment) = parse_uri(uri) + (scheme, authority, path, query, _) = parse_uri(uri) if not scheme or not authority: raise Exception("Only absolute URIs are allowed. uri = %s" % uri) authority = authority.lower() @@ -56,10 +76,8 @@ class CacheController(object): """ retval = {} + # requests provides a CaseInsensitiveDict as headers cc_header = 'cache-control' - if 'Cache-Control' in headers: - cc_header = 'Cache-Control' - if cc_header in headers: parts = headers[cc_header].split(',') parts_with_args = [ @@ -71,6 +89,8 @@ class CacheController(object): return retval def cached_request(self, url, headers): + """Return the cached resquest if available and fresh + """ cache_url = self.cache_url(url) cc = self.parse_cache_control(headers) @@ -153,25 +173,11 @@ class CacheController(object): resp.from_cache = True return resp - # we're not fresh. If we don't have an Etag, clear it out - if 'etag' not in resp.headers: - self.cache.delete(cache_url) - - if 'etag' in resp.headers: - headers['If-None-Match'] = resp.headers['ETag'] - - if 'last-modified' in resp.headers: - headers['If-Modified-Since'] = resp.headers['Last-Modified'] - + # we're not fresh. + self.cache.delete(cache_url) # return the original handler return False - def add_headers(self, url): - resp = self.cache.get(url) - if resp and 'etag' in resp.headers: - return {'If-None-Match': resp.headers['etag']} - return {} - def cache_response(self, request, resp): """ Algorithm for caching requests. @@ -184,26 +190,22 @@ class CacheController(object): return cc_req = self.parse_cache_control(request.headers) - cc = self.parse_cache_control(resp.headers) + cc_resp = self.parse_cache_control(resp.headers) cache_url = self.cache_url(request.url) # Delete it from the cache if we happen to have it stored there - no_store = cc.get('no-store') or cc_req.get('no-store') + no_store = cc_resp.get('no-store') or cc_req.get('no-store') if no_store and self.cache.get(cache_url): self.cache.delete(cache_url) - # If we've been given an etag, then keep the response - if self.cache_etags and 'etag' in resp.headers: - self.cache.set(cache_url, resp) - # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. - elif 'date' in resp.headers: + if 'date' in resp.headers: # cache when there is a max-age > 0 - if cc and cc.get('max-age'): - if int(cc['max-age']) > 0: + if cc_resp and cc_resp.get('max-age'): + if int(cc_resp['max-age']) > 0: self.cache.set(cache_url, resp) # If the request can expire, it means we should cache it diff --git a/sima/lib/httpcli/__init__.py b/sima/lib/httpcli/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/sima/lib/httpcli/filelock.py b/sima/lib/httpcli/filelock.py deleted file mode 100644 index 6dc331b..0000000 --- a/sima/lib/httpcli/filelock.py +++ /dev/null @@ -1,80 +0,0 @@ -# -*- coding: utf-8 -*- -# https://github.com/dmfrey/FileLock - -import os -import time -import errno - -class FileLockException(Exception): - pass - -class FileLock: - """ A file locking mechanism that has context-manager support so - you can use it in a with statement. This should be relatively cross - compatible as it doesn't rely on msvcrt or fcntl for the locking. - """ - - def __init__(self, file_name, timeout=10, delay=.05): - """ Prepare the file locker. Specify the file to lock and optionally - the maximum timeout and the delay between each attempt to lock. - """ - self.is_locked = False - self.lockfile = os.path.join(os.getcwd(), "%s.lock" % file_name) - self.file_name = file_name - self.timeout = timeout - self.delay = delay - - - def acquire(self): - """ Acquire the lock, if possible. If the lock is in use, it check again - every `wait` seconds. It does this until it either gets the lock or - exceeds `timeout` number of seconds, in which case it throws - an exception. - """ - start_time = time.time() - while True: - try: - self.fd = os.open(self.lockfile, os.O_CREAT|os.O_EXCL|os.O_RDWR) - break; - except OSError as e: - if e.errno != errno.EEXIST: - raise - if (time.time() - start_time) >= self.timeout: - raise FileLockException("Timeout occured.") - time.sleep(self.delay) - self.is_locked = True - - - def release(self): - """ Get rid of the lock by deleting the lockfile. - When working in a `with` statement, this gets automatically - called at the end. - """ - if self.is_locked: - os.close(self.fd) - os.unlink(self.lockfile) - self.is_locked = False - - - def __enter__(self): - """ Activated when used in the with statement. - Should automatically acquire a lock to be used in the with block. - """ - if not self.is_locked: - self.acquire() - return self - - - def __exit__(self, type, value, traceback): - """ Activated at the end of the with statement. - It automatically releases the lock if it isn't locked. - """ - if self.is_locked: - self.release() - - - def __del__(self): - """ Make sure that the FileLock instance doesn't leave a lockfile - lying around. - """ - self.release() diff --git a/sima/lib/simaecho.py b/sima/lib/simaecho.py index 592ea03..caf8b5b 100644 --- a/sima/lib/simaecho.py +++ b/sima/lib/simaecho.py @@ -32,8 +32,7 @@ from requests import Session, Request, Timeout, ConnectionError from sima import ECH from sima.lib.meta import Artist from sima.lib.track import Track -from sima.lib.httpcli.controller import CacheController -from sima.lib.httpcli.cache import FileCache +from sima.lib.http import CacheController from sima.utils.utils import WSError, WSNotFound, WSTimeout, WSHTTPError from sima.utils.utils import getws, Throttle if len(ECH.get('apikey')) == 23: # simple hack allowing imp.reload @@ -51,7 +50,7 @@ class SimaEch: timestamp = datetime.utcnow() ratelimit = None name = 'EchoNest' - cache = FileCache('/home/kaliko/.local/share/mpd_sima/http') + cache = False def __init__(self): self._ressource = None diff --git a/sima/plugins/internal/echonest.py b/sima/plugins/internal/echonest.py index 7d4c0e0..ad3a6f9 100644 --- a/sima/plugins/internal/echonest.py +++ b/sima/plugins/internal/echonest.py @@ -22,12 +22,14 @@ Fetching similar artists from echonest web services """ # standard library import +from os.path import join # third parties components # local import from ...lib.simaecho import SimaEch from ...lib.webserv import WebService +from ...lib.cache import FileCache class EchoNest(WebService): @@ -36,6 +38,9 @@ class EchoNest(WebService): def __init__(self, daemon): WebService.__init__(self, daemon) + # Set persitent cache + vardir = daemon.config['sima']['var_dir'] + SimaEch.cache = FileCache(join(vardir, 'http')) self.ws = SimaEch # VIM MODLINE diff --git a/sima/utils/filelock.py b/sima/utils/filelock.py new file mode 100644 index 0000000..8f7065f --- /dev/null +++ b/sima/utils/filelock.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2009 Evan Fosmark +# Copyright (c) 2014 Jack Kaliko +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# + +# https://github.com/dmfrey/FileLock +""" +Plain file lock to une in context: + >>> with FileLock('/path/to/file/to/write'): + >>> # a lock file is maintain within the scope of this context: + >>> # /path/to/file/to/write.lock + >>> ... # process file writing +""" + +import errno +import os +import time + +class FileLockException(Exception): + """FileLock Exception""" + pass + +class FileLock: + """ A plain file lock whit context-manager""" + + def __init__(self, file_name, timeout=10, delay=.05): + """ + Setup file lock. + Setup timeout and the delay. + """ + self.filedsc = None + self.is_locked = False + dirname = os.path.dirname(file_name) + self.lockfile = os.path.join(dirname, '{0}.lock'.format(file_name)) + self.file_name = file_name + self.timeout = timeout + self.delay = delay + + def acquire(self): + """Acquire the lock, if possible. + """ + start_time = time.time() + while True: + try: + self.filedsc = os.open(self.lockfile, + os.O_CREAT|os.O_EXCL|os.O_RDWR) + break + except OSError as err: + if err.errno != errno.EEXIST: + raise + if (time.time() - start_time) >= self.timeout: + raise FileLockException('Timeout occured.') + time.sleep(self.delay) + self.is_locked = True + + def release(self): + """Release the lock. + """ + if self.is_locked: + os.close(self.filedsc) + os.unlink(self.lockfile) + self.is_locked = False + + def __enter__(self): + """start of the with statement. + """ + if not self.is_locked: + self.acquire() + return self + + def __exit__(self, type, value, traceback): + """end of the with statement + """ + if self.is_locked: + self.release() + + def __del__(self): + """Cleanup + """ + self.release()