]> kaliko git repositories - sid.git/commitdiff
feeds: use proper http caching
authorkaliko <kaliko@azylum.org>
Mon, 4 May 2020 09:45:46 +0000 (11:45 +0200)
committerkaliko <kaliko@azylum.org>
Mon, 4 May 2020 09:45:46 +0000 (11:45 +0200)
sid/feeds.py

index 2cb0964646276bf0ec2e7c5529d18ecb99731b07..64ddf1217300b548ded94d947a14cc22aba3b9af 100644 (file)
@@ -52,62 +52,68 @@ class FeedMonitor(threading.Thread):
         self.seen = dict()
         self.thread_killed = False
 
+    def _update_cache(self, feed, parsed):
+        self.seen[feed].update({'ids': {p.id for p in parsed.entries} or {}})
+        # Common HTTP caching
+        if parsed.get('etag', False):
+            self.seen[feed].update({'cache': {'etag': parsed.etag}})
+        if parsed.get('modified', False):
+            self.seen[feed].update({'cache': {'modified': parsed.modified}})
+
     def new_posts(self, feed):
         """Send new posts in feed"""
-        parsed_feed = feed_parse(feed)
-
+        self.plugin.log.debug('feed:     : "%s"', feed)
+        if self.seen.get(feed) and self.seen.get(feed).get('cache'):
+            parsed_feed = feed_parse(feed, **self.seen[feed]['cache'])
+        else:
+            if self.seen.get(feed):
+                self.plugin.log.debug('No cache headers set (etag/modified)')
+            parsed_feed = feed_parse(feed)
         # Cannot resolve address
         if 'status' not in parsed_feed:
             self.plugin.log.error('Error from "%s": %s.',
                                   feed, parsed_feed.bozo_exception.__repr__())
             return
-
+        # http caching
+        if parsed_feed.status == 304:
+            self.plugin.log.debug('Got 304 not modified')
+            return
         # unusual return http code
         if parsed_feed.status != 200:
             self.plugin.log.warning(
                 'Got code %(status)d from "%(href)s" (please update).',
                 parsed_feed)
             return
-
-        feed_updated = parsed_feed.feed.get('updated_parsed', None)
-
-        # Avoid looping over all posts if possible
-        if feed_updated and strtm_to_dtm(feed_updated) < self.last_check:
-            self.plugin.log.debug('updated   : %s', strtm_to_dtm(feed_updated))
-            self.plugin.log.debug('last check: %s', self.last_check)
+        if not self.seen.setdefault(feed):
+            # Fills with post id when first started (prevent from posting all
+            # entries at startup)
+            self.seen[feed] = {'cache': None}
+            self._update_cache(feed, parsed_feed)
             return
-
         title = '"%s":' % parsed_feed.feed.get('title', 'n/a')
         xtitle = '<strong>%s</strong>:' % html_escape(
             parsed_feed.feed.get('title', 'n/a'))
         text = [title]
         xhtml = [xtitle]
-        feed_id = parsed_feed.feed.get('id', feed)
-        if not self.seen.setdefault(feed_id):
-            # Fills with post id when first started (prevent from posting all
-            # entries at startup)
-            self.seen[feed_id] = {p.id for p in parsed_feed.entries}
-            return
 
         # Detecting new post
         entries = {p.id for p in parsed_feed.entries}
+        seen_ids = self.seen.get(feed).get('ids')
         new_entries = [p for p in parsed_feed.entries
-                       if p.id in entries - self.seen.get(feed_id)]
+                       if p.id in entries - seen_ids]
         for post in new_entries:
             self.plugin.log.info(post.title)
-
             body = '%(title)s %(link)s' % post
             text.append(body)
-
             xpost = {'title': html_escape(post.get('title', 'n/a'))}
             xpost['link'] = html_escape(post.get('link',))
             xbody = '<a href="{link}">{title}</a>'.format(**xpost)
             xhtml.append(xbody)
-        # Updating self.seen
-        self.seen[feed_id] = entries
+        # Updating self.seen, entries and cache headers
+        self._update_cache(feed, parsed_feed)
         if len(text) > 1:
             self.plugin.send(self.plugin.bot.room,
-                    {'mhtml':'<br />'.join(xhtml), 'mbody':'\n'.join(text)},
+                    {'mhtml': '<br />'.join(xhtml), 'mbody': '\n'.join(text)},
                     mtype='groupchat')
 
     def run(self):
@@ -130,18 +136,13 @@ class FeedMonitor(threading.Thread):
 class Feeds(Plugin):
     TEMPO = 60
     FEEDS = [
-        # not working <http://bugs.debian.org/612274>
-        # 'http://www.debian.org/security/dsa',
-
-        # not working <http://bugs.debian.org/612274>
-        # 'http://www.debian.org/News/news',
-
+        'https://www.debian.org/security/dsa',
+        'https://www.debian.org/News/news',
         # Some packages
         'https://tracker.debian.org/pkg/prosody/rss',
         'https://tracker.debian.org/pkg/ejabberd/rss',
-
         # Misc
-        'http://planet.debian.org/atom.xml',
+        'https://planet.debian.org/atom.xml',
         ]
 
     def __init__(self, bot):
@@ -165,5 +166,5 @@ class Feeds(Plugin):
                                                html_escape(u[7:])
                                               ) for u in Feeds.FEEDS]
         msg = {'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS),
-               'mhtml': 'Feeds:<br />' + '<br />'.join(html),}
+               'mhtml': 'Feeds:<br />' + '<br />'.join(html)}
         self.reply(rcv, msg)