]> kaliko git repositories - sid.git/blobdiff - sid/feeds.py
Better use of logging
[sid.git] / sid / feeds.py
index 7291af27cc72dae9bf22644466b8c2757bc5ef92..a40f7662e8f14383d1d55089dd12f59fa91b82b5 100644 (file)
@@ -17,6 +17,7 @@
 import datetime
 import threading
 import time
+import traceback
 
 from feedparser import parse as feed_parse
 
@@ -34,7 +35,7 @@ html_escape_table = {
 
 def html_escape(text):
     """Produce entities within text."""
-    return "".join(html_escape_table.get(c, c) for c in text)
+    return ''.join(html_escape_table.get(c, c) for c in text)
 
 
 def strtm_to_dtm(struc_time):
@@ -46,46 +47,38 @@ class FeedMonitor(threading.Thread):
         threading.Thread.__init__(self)
         self.feeds_list = plugin.FEEDS
         self.tempo = plugin.TEMPO
-        self.bot = plugin.bot
+        self.plugin = plugin
         self.last_check = datetime.datetime.utcnow()
         self.seen = dict()
         self.thread_killed = False
 
-    def send(self, message):
-        """simple wrapper around JabberBot().send()"""
-        self.bot.log.debug(self.bot.room)
-        self.bot.send_message(mto=self.bot.room,
-                              mbody=message[1],
-                              mhtml=message[0],
-                              mtype='groupchat')
-
     def new_posts(self, feed):
         """Send new posts in feed"""
         parsed_feed = feed_parse(feed)
 
         # Cannot resolve address
         if 'status' not in parsed_feed:
-            self.bot.log.error(u'Error from "%s": %s.' %
-                    (feed, parsed_feed.bozo_exception.__repr__()))
+            self.plugin.log.error('Error from "%s": %s.',
+                                  feed, parsed_feed.bozo_exception.__repr__())
             return
 
         # unusual return http code
         if parsed_feed.status != 200:
-            self.bot.log.error(
-                u'Got code %(status)d from "%(href)s" (please update).' %
-                    parsed_feed)
+            self.plugin.log.warning(
+                'Got code %(status)d from "%(href)s" (please update).',
+                parsed_feed)
             return
 
         feed_updated = parsed_feed.feed.get('updated_parsed', None)
 
         # Avoid looping over all posts if possible
         if feed_updated and strtm_to_dtm(feed_updated) < self.last_check:
-            self.bot.log.debug('updated   : %s' % strtm_to_dtm(feed_updated))
-            self.bot.log.debug('last check: %s' % self.last_check)
+            self.plugin.log.debug('updated   : %s', strtm_to_dtm(feed_updated))
+            self.plugin.log.debug('last check: %s', self.last_check)
             return
 
-        title = u'"%s":' % parsed_feed.feed.get('title', 'n/a')
-        xtitle = u'<strong>%s</strong>:' % html_escape(
+        title = '"%s":' % parsed_feed.feed.get('title', 'n/a')
+        xtitle = '<strong>%s</strong>:' % html_escape(
             parsed_feed.feed.get('title', 'n/a'))
         text = [title]
         xhtml = [xtitle]
@@ -93,34 +86,39 @@ class FeedMonitor(threading.Thread):
         if not self.seen.setdefault(feed_id):
             # Fills with post id when first started (prevent from posting all
             # entries at startup)
-            self.seen[feed_id] = [post.id for post in parsed_feed.entries]
+            self.seen[feed_id] = {p.id for p in parsed_feed.entries}
             return
 
-        for post in parsed_feed.entries:
-            if post.id not in self.seen.get(feed_id):
-                self.seen[feed_id].append(post.id)
-                self.bot.log.info(post.title)
-
-                body = u'%(title)s %(link)s' % post
-                text.append(body)
-
-                xpost = dict(**post)
-                xpost['title'] = html_escape(xpost.get('title', 'n/a'))
-                xbody = u'<a href="%(link)s">%(title)s</a>' % xpost
-                xhtml.append(xbody)
-
+        # Detecting new post
+        entries = {p.id for p in parsed_feed.entries}
+        new_entries = [p for p in parsed_feed.entries
+                       if p.id in entries - self.seen.get(feed_id)]
+        for post in new_entries:
+            self.plugin.log.info(post.title)
+
+            body = '%(title)s %(link)s' % post
+            text.append(body)
+
+            xpost = {'title': html_escape(post.get('title', 'n/a'))}
+            xpost['link'] = html_escape(post.get('link',))
+            xbody = '<a href="{link}">{title}</a>'.format(**xpost)
+            xhtml.append(xbody)
+        # Updating self.seen
+        self.seen[feed_id] = entries
         if len(text) > 1:
-            self.send(('<br/>'.join(xhtml), '\n'.join(text)))
+            self.plugin.send(self.plugin.bot.room,
+                    {'mhtml':'<br />'.join(xhtml), 'mbody':'\n'.join(text)},
+                    mtype='groupchat')
 
     def run(self):
         while not self.thread_killed:
-            self.bot.log.info(u'feeds check')
+            self.plugin.log.debug('feeds check')
             for feed in self.feeds_list:
                 try:
                     self.new_posts(feed)
                 except Exception as err:
-                    self.bot.log.error(u'feeds thread crashed')
-                    self.bot.log.error(err)
+                    self.plugin.log.error('feeds thread crashed: %s', err)
+                    self.plugin.log.error(''.join(traceback.format_exc()))
                     self.thread_killed = True
             self.last_check = datetime.datetime.utcnow()
             for _ in list(range(self.tempo)):
@@ -158,10 +156,18 @@ class Feeds(Plugin):
         self.th_mon.thread_killed = True
 
     @botcmd
-    def feeds(self, message, args):
+    def feeds(self, rcv, args):
         """feeds monitors debian project related feeds.
         !feeds : registred feeds list
         !feeds last : last check time"""
         if 'last' in args:
-            return u'Last feeds check: %s' % self.th_mon.last_check
-        return u'\n'.join(Feeds.FEEDS)
+            self.reply(rcv, 'Last feeds check: %s' % self.th_mon.last_check)
+            return
+        html = ['<a href="{0}">{1}</a>'.format(html_escape(u),
+                                               html_escape(u[7:])
+                                               ) for u in Feeds.FEEDS]
+        msg = {
+                'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS),
+                'mhtml': 'Feeds:<br />' + '<br />'.join(html),
+                }
+        self.reply(rcv, msg)