]> kaliko git repositories - sid.git/commitdiff
Memory efficient new entries detection
authorkaliko <kaliko@azylum.org>
Fri, 14 Nov 2014 18:26:18 +0000 (19:26 +0100)
committerkaliko <kaliko@azylum.org>
Fri, 14 Nov 2014 18:26:18 +0000 (19:26 +0100)
sid/feeds.py

index 043c784dcb23f4cf0484bee46badee0f2e370e27..cf77ae023aebf22a0ca7e7ebe20e2fe7a7b628af 100644 (file)
@@ -17,6 +17,7 @@
 import datetime
 import threading
 import time
+import traceback
 
 from feedparser import parse as feed_parse
 
@@ -92,24 +93,28 @@ class FeedMonitor(threading.Thread):
         if not self.seen.setdefault(feed_id):
             # Fills with post id when first started (prevent from posting all
             # entries at startup)
-            self.seen[feed_id] = [post.id for post in parsed_feed.entries]
-            return
-
-        for post in parsed_feed.entries:
-            if post.id not in self.seen.get(feed_id):
-                self.seen[feed_id].append(post.id)
-                self.bot.log.info(post.title)
-
-                body = '%(title)s %(link)s' % post
-                text.append(body)
-
-                xpost = dict(**post)
-                xpost['title'] = html_escape(xpost.get('title', 'n/a'))
-                xbody = '<a href="%(link)s">%(title)s</a>' % xpost
-                xhtml.append(xbody)
-
+            self.seen[feed_id] = {p.id for p in parsed_feed.entries}
+            #return
+
+        # Detecting new post
+        entries = {p.id for p in parsed_feed.entries}
+        new_entries = [p for p in parsed_feed.entries
+                       if p.id in entries - self.seen.get(feed_id)]
+        for post in new_entries:
+            self.bot.log.info(post.title)
+
+            body = '%(title)s %(link)s' % post
+            text.append(body)
+
+            xpost = dict(**post)
+            xpost['title'] = html_escape(xpost.get('title', 'n/a'))
+            xbody = '<a href="%(link)s">%(title)s</a>' % xpost
+            xhtml.append(xbody)
+        # Updating self.seen
+        self.seen[feed_id] = entries
         if len(text) > 1:
-            self.send(('<br/>'.join(xhtml), '\n'.join(text)))
+            self.bot.log.debug('<br />'.join(xhtml))
+            self.send(('<br />'.join(xhtml), '\n'.join(text)))
 
     def run(self):
         while not self.thread_killed:
@@ -118,8 +123,8 @@ class FeedMonitor(threading.Thread):
                 try:
                     self.new_posts(feed)
                 except Exception as err:
-                    self.bot.log.error('feeds thread crashed')
-                    self.bot.log.error(err)
+                    self.bot.log.error('feeds thread crashed: %s' % err)
+                    self.bot.log.error(''.join(traceback.format_exc()))
                     self.thread_killed = True
             self.last_check = datetime.datetime.utcnow()
             for _ in list(range(self.tempo)):
@@ -145,6 +150,7 @@ class Feeds(Plugin):
        'http://rss.gmane.org/gmane.linux.debian.user.security.announce',
        'http://planet-fr.debian.net/users/rss20.xml',
        'http://planet.debian.org/atom.xml',
+       'http://rss.gmane.org/gmane.linux.debian.devel.general',
         ]
 
     def __init__(self, bot):