From: kaliko Date: Fri, 14 Nov 2014 18:26:18 +0000 (+0100) Subject: Memory efficient new entries detection X-Git-Tag: 0.1.0~51 X-Git-Url: http://git.kaliko.me/?p=sid.git;a=commitdiff_plain;h=9b72db4533b5443c7f9e25d2b6050d9e4642ce7d Memory efficient new entries detection --- diff --git a/sid/feeds.py b/sid/feeds.py index 043c784..cf77ae0 100644 --- a/sid/feeds.py +++ b/sid/feeds.py @@ -17,6 +17,7 @@ import datetime import threading import time +import traceback from feedparser import parse as feed_parse @@ -92,24 +93,28 @@ class FeedMonitor(threading.Thread): if not self.seen.setdefault(feed_id): # Fills with post id when first started (prevent from posting all # entries at startup) - self.seen[feed_id] = [post.id for post in parsed_feed.entries] - return - - for post in parsed_feed.entries: - if post.id not in self.seen.get(feed_id): - self.seen[feed_id].append(post.id) - self.bot.log.info(post.title) - - body = '%(title)s %(link)s' % post - text.append(body) - - xpost = dict(**post) - xpost['title'] = html_escape(xpost.get('title', 'n/a')) - xbody = '%(title)s' % xpost - xhtml.append(xbody) - + self.seen[feed_id] = {p.id for p in parsed_feed.entries} + #return + + # Detecting new post + entries = {p.id for p in parsed_feed.entries} + new_entries = [p for p in parsed_feed.entries + if p.id in entries - self.seen.get(feed_id)] + for post in new_entries: + self.bot.log.info(post.title) + + body = '%(title)s %(link)s' % post + text.append(body) + + xpost = dict(**post) + xpost['title'] = html_escape(xpost.get('title', 'n/a')) + xbody = '%(title)s' % xpost + xhtml.append(xbody) + # Updating self.seen + self.seen[feed_id] = entries if len(text) > 1: - self.send(('
'.join(xhtml), '\n'.join(text))) + self.bot.log.debug('
'.join(xhtml)) + self.send(('
'.join(xhtml), '\n'.join(text))) def run(self): while not self.thread_killed: @@ -118,8 +123,8 @@ class FeedMonitor(threading.Thread): try: self.new_posts(feed) except Exception as err: - self.bot.log.error('feeds thread crashed') - self.bot.log.error(err) + self.bot.log.error('feeds thread crashed: %s' % err) + self.bot.log.error(''.join(traceback.format_exc())) self.thread_killed = True self.last_check = datetime.datetime.utcnow() for _ in list(range(self.tempo)): @@ -145,6 +150,7 @@ class Feeds(Plugin): 'http://rss.gmane.org/gmane.linux.debian.user.security.announce', 'http://planet-fr.debian.net/users/rss20.xml', 'http://planet.debian.org/atom.xml', + 'http://rss.gmane.org/gmane.linux.debian.devel.general', ] def __init__(self, bot):