X-Git-Url: http://git.kaliko.me/?a=blobdiff_plain;f=sid%2Ffeeds.py;h=64ddf1217300b548ded94d947a14cc22aba3b9af;hb=6a59e4d7abd60a2e785f3e4bfcc3189ad43122e3;hp=a40f7662e8f14383d1d55089dd12f59fa91b82b5;hpb=f2f44c64470d0967913feb1bfb9868fa0c7d9e1b;p=sid.git
diff --git a/sid/feeds.py b/sid/feeds.py
index a40f766..64ddf12 100644
--- a/sid/feeds.py
+++ b/sid/feeds.py
@@ -52,62 +52,68 @@ class FeedMonitor(threading.Thread):
self.seen = dict()
self.thread_killed = False
+ def _update_cache(self, feed, parsed):
+ self.seen[feed].update({'ids': {p.id for p in parsed.entries} or {}})
+ # Common HTTP caching
+ if parsed.get('etag', False):
+ self.seen[feed].update({'cache': {'etag': parsed.etag}})
+ if parsed.get('modified', False):
+ self.seen[feed].update({'cache': {'modified': parsed.modified}})
+
def new_posts(self, feed):
"""Send new posts in feed"""
- parsed_feed = feed_parse(feed)
-
+ self.plugin.log.debug('feed: : "%s"', feed)
+ if self.seen.get(feed) and self.seen.get(feed).get('cache'):
+ parsed_feed = feed_parse(feed, **self.seen[feed]['cache'])
+ else:
+ if self.seen.get(feed):
+ self.plugin.log.debug('No cache headers set (etag/modified)')
+ parsed_feed = feed_parse(feed)
# Cannot resolve address
if 'status' not in parsed_feed:
self.plugin.log.error('Error from "%s": %s.',
feed, parsed_feed.bozo_exception.__repr__())
return
-
+ # http caching
+ if parsed_feed.status == 304:
+ self.plugin.log.debug('Got 304 not modified')
+ return
# unusual return http code
if parsed_feed.status != 200:
self.plugin.log.warning(
'Got code %(status)d from "%(href)s" (please update).',
parsed_feed)
return
-
- feed_updated = parsed_feed.feed.get('updated_parsed', None)
-
- # Avoid looping over all posts if possible
- if feed_updated and strtm_to_dtm(feed_updated) < self.last_check:
- self.plugin.log.debug('updated : %s', strtm_to_dtm(feed_updated))
- self.plugin.log.debug('last check: %s', self.last_check)
+ if not self.seen.setdefault(feed):
+ # Fills with post id when first started (prevent from posting all
+ # entries at startup)
+ self.seen[feed] = {'cache': None}
+ self._update_cache(feed, parsed_feed)
return
-
title = '"%s":' % parsed_feed.feed.get('title', 'n/a')
xtitle = '%s:' % html_escape(
parsed_feed.feed.get('title', 'n/a'))
text = [title]
xhtml = [xtitle]
- feed_id = parsed_feed.feed.get('id', feed)
- if not self.seen.setdefault(feed_id):
- # Fills with post id when first started (prevent from posting all
- # entries at startup)
- self.seen[feed_id] = {p.id for p in parsed_feed.entries}
- return
# Detecting new post
entries = {p.id for p in parsed_feed.entries}
+ seen_ids = self.seen.get(feed).get('ids')
new_entries = [p for p in parsed_feed.entries
- if p.id in entries - self.seen.get(feed_id)]
+ if p.id in entries - seen_ids]
for post in new_entries:
self.plugin.log.info(post.title)
-
body = '%(title)s %(link)s' % post
text.append(body)
-
xpost = {'title': html_escape(post.get('title', 'n/a'))}
xpost['link'] = html_escape(post.get('link',))
xbody = '{title}'.format(**xpost)
xhtml.append(xbody)
- # Updating self.seen
- self.seen[feed_id] = entries
+ # Updating self.seen, entries and cache headers
+ self._update_cache(feed, parsed_feed)
if len(text) > 1:
self.plugin.send(self.plugin.bot.room,
- {'mhtml':'
'.join(xhtml), 'mbody':'\n'.join(text)},
+ {'mhtml': '
'.join(xhtml), 'mbody': '\n'.join(text)},
mtype='groupchat')
def run(self):
@@ -130,20 +136,13 @@ class FeedMonitor(threading.Thread):
class Feeds(Plugin):
TEMPO = 60
FEEDS = [
- # not working
- # 'http://www.debian.org/security/dsa',
-
- # not working
- # 'http://www.debian.org/News/news',
-
- # DPN in french
- 'http://www.debian.org/News/weekly/dwn.fr.rdf',
-
+ 'https://www.debian.org/security/dsa',
+ 'https://www.debian.org/News/news',
+ # Some packages
+ 'https://tracker.debian.org/pkg/prosody/rss',
+ 'https://tracker.debian.org/pkg/ejabberd/rss',
# Misc
- 'http://rss.gmane.org/topics/excerpts/gmane.linux.debian.devel.announce',
- 'http://rss.gmane.org/gmane.linux.debian.user.security.announce',
- 'http://planet-fr.debian.net/users/rss20.xml',
- 'http://planet.debian.org/atom.xml',
+ 'https://planet.debian.org/atom.xml',
]
def __init__(self, bot):
@@ -165,9 +164,7 @@ class Feeds(Plugin):
return
html = ['{1}'.format(html_escape(u),
html_escape(u[7:])
- ) for u in Feeds.FEEDS]
- msg = {
- 'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS),
- 'mhtml': 'Feeds:
' + '
'.join(html),
- }
+ ) for u in Feeds.FEEDS]
+ msg = {'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS),
+ 'mhtml': 'Feeds:
' + '
'.join(html)}
self.reply(rcv, msg)