# -*- coding: utf-8 -*-
-
-# Copyright (C) 2011, 2014 kaliko <kaliko@azylum.org>
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 only.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
+# SPDX-FileCopyrightText: 2011, 2014, 2020 kaliko <kaliko@azylum.org>
+# SPDX-License-Identifier: GPL-3.0-or-later
import datetime
import threading
import time
+import traceback
+
+from urllib.error import URLError
+from urllib.parse import urlparse
from feedparser import parse as feed_parse
def html_escape(text):
"""Produce entities within text."""
- return "".join(html_escape_table.get(c, c) for c in text)
+ return ''.join(html_escape_table.get(c, c) for c in text)
def strtm_to_dtm(struc_time):
threading.Thread.__init__(self)
self.feeds_list = plugin.FEEDS
self.tempo = plugin.TEMPO
- self.bot = plugin.bot
+ self.plugin = plugin
self.last_check = datetime.datetime.utcnow()
self.seen = dict()
self.thread_killed = False
- def send(self, message):
- """simple wrapper around JabberBot().send()"""
- self.bot.log.debug(self.bot.room)
- self.bot.send_message(mto=self.bot.room,
- mbody=message[1],
- mhtml=message[0],
- mtype='groupchat')
+ def _update_cache(self, feed, parsed):
+ self.seen[feed].update({'ids': {p.id for p in parsed.entries} or {}})
+ # Common HTTP caching
+ if parsed.get('etag', False):
+ self.seen[feed].update({'cache': {'etag': parsed.etag}})
+ if parsed.get('modified', False):
+ self.seen[feed].update({'cache': {'modified': parsed.modified}})
def new_posts(self, feed):
"""Send new posts in feed"""
- parsed_feed = feed_parse(feed)
-
+ self.plugin.log.debug('feed: : "%s"', feed)
+ if self.seen.get(feed) and self.seen.get(feed).get('cache'):
+ parsed_feed = feed_parse(feed, **self.seen[feed]['cache'])
+ else:
+ if self.seen.get(feed):
+ self.plugin.log.debug('No cache headers set (etag/modified)')
+ parsed_feed = feed_parse(feed)
# Cannot resolve address
if 'status' not in parsed_feed:
- self.bot.log.error(u'Error from "%s": %s.' %
- (feed, parsed_feed.bozo_exception.__repr__()))
+ self.plugin.log.error('Error from "%s": %s.',
+ feed, parsed_feed.bozo_exception.__repr__())
+ return
+ # http caching
+ if parsed_feed.status == 304:
+ self.plugin.log.debug('Got 304 not modified')
return
-
# unusual return http code
if parsed_feed.status != 200:
- self.bot.log.error(
- u'Got code %(status)d from "%(href)s" (please update).' %
- parsed_feed)
+ self.plugin.log.warning(
+ 'Got code %(status)d from "%(href)s" (please update).',
+ parsed_feed)
return
-
- feed_updated = parsed_feed.feed.get('updated_parsed', None)
-
- # Avoid looping over all posts if possible
- if feed_updated and strtm_to_dtm(feed_updated) < self.last_check:
- self.bot.log.debug('updated : %s' % strtm_to_dtm(feed_updated))
- self.bot.log.debug('last check: %s' % self.last_check)
+ if not self.seen.setdefault(feed):
+ # Fills with post id when first started (prevent from posting all
+ # entries at startup)
+ self.seen[feed] = {'cache': None}
+ self._update_cache(feed, parsed_feed)
return
-
- title = u'"%s":' % parsed_feed.feed.get('title', 'n/a')
- xtitle = u'<strong>%s</strong>:' % html_escape(
+ title = '"%s":' % parsed_feed.feed.get('title', 'n/a')
+ xtitle = '<strong>%s</strong>:' % html_escape(
parsed_feed.feed.get('title', 'n/a'))
text = [title]
xhtml = [xtitle]
- feed_id = parsed_feed.feed.get('id', feed)
- if not self.seen.setdefault(feed_id):
- # Fills with post id when first started (prevent from posting all
- # entries at startup)
- self.seen[feed_id] = [post.id for post in parsed_feed.entries]
- return
-
- for post in parsed_feed.entries:
- if post.id not in self.seen.get(feed_id):
- self.seen[feed_id].append(post.id)
- self.bot.log.info(post.title)
-
- body = u'%(title)s %(link)s' % post
- text.append(body)
-
- xpost = dict(**post)
- xpost['title'] = html_escape(xpost.get('title', 'n/a'))
- xbody = u'<a href="%(link)s">%(title)s</a>' % xpost
- xhtml.append(xbody)
+ # Detecting new post
+ entries = {p.id for p in parsed_feed.entries}
+ seen_ids = self.seen.get(feed).get('ids')
+ new_entries = [p for p in parsed_feed.entries
+ if p.id in entries - seen_ids]
+ for post in new_entries:
+ self.plugin.log.info(post.title)
+ body = '%(title)s %(link)s' % post
+ text.append(body)
+ xpost = {'title': html_escape(post.get('title', 'n/a'))}
+ xpost['link'] = html_escape(post.get('link',))
+ xbody = '<a href="{link}">{title}</a>'.format(**xpost)
+ xhtml.append(xbody)
+ # Updating self.seen, entries and cache headers
+ self._update_cache(feed, parsed_feed)
if len(text) > 1:
- self.send(('<br/>'.join(xhtml), '\n'.join(text)))
+ self.plugin.send(self.plugin.bot.room,
+ {'mhtml': '<br />'.join(xhtml), 'mbody': '\n'.join(text)},
+ mtype='groupchat')
def run(self):
while not self.thread_killed:
- self.bot.log.info(u'feeds check')
+ self.plugin.log.debug('feeds check')
for feed in self.feeds_list:
try:
self.new_posts(feed)
- except Exception as err:
- self.bot.log.error(u'feeds thread crashed')
- self.bot.log.error(err)
+ except ConnectionError as err: # Non fatal exception
+ self.plugin.log.error('connection error on %s: %s', feed, err)
+ except URLError as err: # Non fatal exception
+ self.plugin.log.error('error for "%s": %s', feed, err.reason)
+ except Exception as err: # Unknown execption, killing thread anyway
+ self.plugin.log.error('feeds thread crashed: %s', err)
+ self.plugin.log.error(''.join(traceback.format_exc()))
self.thread_killed = True
self.last_check = datetime.datetime.utcnow()
for _ in list(range(self.tempo)):
class Feeds(Plugin):
+ """
+ .. note::
+ Feeds plugin depends on external module: **feedparser**
+ """
+
+ #: Time between feeds check
TEMPO = 60
+ #: Default feeds to monitor
FEEDS = [
- # not working <http://bugs.debian.org/612274>
- # 'http://www.debian.org/security/dsa',
-
- # not working <http://bugs.debian.org/612274>
- # 'http://www.debian.org/News/news',
-
- # DPN in french
- 'http://www.debian.org/News/weekly/dwn.fr.rdf',
-
+ 'https://www.debian.org/security/dsa',
+ 'https://www.debian.org/News/news',
+ # Some packages
+ 'https://tracker.debian.org/pkg/prosody/rss',
+ 'https://tracker.debian.org/pkg/ejabberd/rss',
# Misc
- 'http://rss.gmane.org/topics/excerpts/gmane.linux.debian.devel.announce',
- 'http://rss.gmane.org/gmane.linux.debian.user.security.announce',
- 'http://planet-fr.debian.net/users/rss20.xml',
- 'http://planet.debian.org/atom.xml',
+ 'https://planet.debian.org/atom.xml',
]
def __init__(self, bot):
self.th_mon.thread_killed = True
@botcmd
- def feeds(self, message, args):
- """feeds monitors debian project related feeds.
- !feeds : registred feeds list
- !feeds last : last check time"""
+ def feeds(self, rcv, args):
+ """Monitors debian project related feeds.
+
+ * ``!feeds`` : registred feeds list
+ * ``!feeds last`` : last check time"""
if 'last' in args:
- return u'Last feeds check: %s' % self.th_mon.last_check
- return u'\n'.join(Feeds.FEEDS)
+ date = '{:%Y-%m-%d %H:%M} (utc)'.format(self.th_mon.last_check)
+ self.reply(rcv, f'Last feeds check: {date}')
+ return
+ html = ['<a href="{0}">{1}</a>'.format(
+ html_escape(u),
+ html_escape('{1}{2}'.format(*urlparse(u)))
+ ) for u in Feeds.FEEDS]
+ msg = {'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS),
+ 'mhtml': 'Feeds:<br />' + '<br />'.join(html)}
+ self.reply(rcv, msg)