From fc1ed37fe65aa46c7dce71c7fe22cd6e47bf5a10 Mon Sep 17 00:00:00 2001 From: kaliko Date: Mon, 4 May 2020 11:45:46 +0200 Subject: [PATCH] feeds: use proper http caching --- sid/feeds.py | 65 ++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/sid/feeds.py b/sid/feeds.py index 2cb0964..64ddf12 100644 --- a/sid/feeds.py +++ b/sid/feeds.py @@ -52,62 +52,68 @@ class FeedMonitor(threading.Thread): self.seen = dict() self.thread_killed = False + def _update_cache(self, feed, parsed): + self.seen[feed].update({'ids': {p.id for p in parsed.entries} or {}}) + # Common HTTP caching + if parsed.get('etag', False): + self.seen[feed].update({'cache': {'etag': parsed.etag}}) + if parsed.get('modified', False): + self.seen[feed].update({'cache': {'modified': parsed.modified}}) + def new_posts(self, feed): """Send new posts in feed""" - parsed_feed = feed_parse(feed) - + self.plugin.log.debug('feed: : "%s"', feed) + if self.seen.get(feed) and self.seen.get(feed).get('cache'): + parsed_feed = feed_parse(feed, **self.seen[feed]['cache']) + else: + if self.seen.get(feed): + self.plugin.log.debug('No cache headers set (etag/modified)') + parsed_feed = feed_parse(feed) # Cannot resolve address if 'status' not in parsed_feed: self.plugin.log.error('Error from "%s": %s.', feed, parsed_feed.bozo_exception.__repr__()) return - + # http caching + if parsed_feed.status == 304: + self.plugin.log.debug('Got 304 not modified') + return # unusual return http code if parsed_feed.status != 200: self.plugin.log.warning( 'Got code %(status)d from "%(href)s" (please update).', parsed_feed) return - - feed_updated = parsed_feed.feed.get('updated_parsed', None) - - # Avoid looping over all posts if possible - if feed_updated and strtm_to_dtm(feed_updated) < self.last_check: - self.plugin.log.debug('updated : %s', strtm_to_dtm(feed_updated)) - self.plugin.log.debug('last check: %s', self.last_check) + if not self.seen.setdefault(feed): + # Fills with post id when first started (prevent from posting all + # entries at startup) + self.seen[feed] = {'cache': None} + self._update_cache(feed, parsed_feed) return - title = '"%s":' % parsed_feed.feed.get('title', 'n/a') xtitle = '%s:' % html_escape( parsed_feed.feed.get('title', 'n/a')) text = [title] xhtml = [xtitle] - feed_id = parsed_feed.feed.get('id', feed) - if not self.seen.setdefault(feed_id): - # Fills with post id when first started (prevent from posting all - # entries at startup) - self.seen[feed_id] = {p.id for p in parsed_feed.entries} - return # Detecting new post entries = {p.id for p in parsed_feed.entries} + seen_ids = self.seen.get(feed).get('ids') new_entries = [p for p in parsed_feed.entries - if p.id in entries - self.seen.get(feed_id)] + if p.id in entries - seen_ids] for post in new_entries: self.plugin.log.info(post.title) - body = '%(title)s %(link)s' % post text.append(body) - xpost = {'title': html_escape(post.get('title', 'n/a'))} xpost['link'] = html_escape(post.get('link',)) xbody = '{title}'.format(**xpost) xhtml.append(xbody) - # Updating self.seen - self.seen[feed_id] = entries + # Updating self.seen, entries and cache headers + self._update_cache(feed, parsed_feed) if len(text) > 1: self.plugin.send(self.plugin.bot.room, - {'mhtml':'
'.join(xhtml), 'mbody':'\n'.join(text)}, + {'mhtml': '
'.join(xhtml), 'mbody': '\n'.join(text)}, mtype='groupchat') def run(self): @@ -130,18 +136,13 @@ class FeedMonitor(threading.Thread): class Feeds(Plugin): TEMPO = 60 FEEDS = [ - # not working - # 'http://www.debian.org/security/dsa', - - # not working - # 'http://www.debian.org/News/news', - + 'https://www.debian.org/security/dsa', + 'https://www.debian.org/News/news', # Some packages 'https://tracker.debian.org/pkg/prosody/rss', 'https://tracker.debian.org/pkg/ejabberd/rss', - # Misc - 'http://planet.debian.org/atom.xml', + 'https://planet.debian.org/atom.xml', ] def __init__(self, bot): @@ -165,5 +166,5 @@ class Feeds(Plugin): html_escape(u[7:]) ) for u in Feeds.FEEDS] msg = {'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS), - 'mhtml': 'Feeds:
' + '
'.join(html),} + 'mhtml': 'Feeds:
' + '
'.join(html)} self.reply(rcv, msg) -- 2.39.2