1 # -*- coding: utf-8 -*-
2 # SPDX-FileCopyrightText: 2011, 2014, 2020 kaliko <kaliko@azylum.org>
3 # SPDX-License-Identifier: GPL-3.0-or-later
4 """Publish news from various Debian feeds (security, planet, package tracker, see :py:obj:`sid.feeds.Feeds.FEEDS` for defaults).
6 Can easily be used for other feeds (rss, atom).
9 Feeds plugin depends on external module: **feedparser**
11 >>> from sid.feeds import Feeds
12 >>> # Time between check in seconds
14 >>> # Fedds to monitor, cf. sid.feeds.Feeds.FEEDS for defaults
16 'https://example.org/feeds/atom/news.atom.xml'
25 from urllib.error import URLError
26 from urllib.parse import urlparse
28 from feedparser import parse as feed_parse
30 from .plugin import Plugin, botcmd
42 def html_escape(text):
43 """Produce entities within text."""
44 return ''.join(html_escape_table.get(c, c) for c in text)
47 def strtm_to_dtm(struc_time):
48 return datetime.datetime(*struc_time[:6])
51 class FeedMonitor(threading.Thread):
52 def __init__(self, plugin):
53 threading.Thread.__init__(self)
54 self.feeds_list = plugin.FEEDS
55 self.tempo = plugin.TEMPO
57 self.last_check = datetime.datetime.utcnow()
59 self.thread_killed = False
61 def _update_cache(self, feed, parsed):
62 self.seen[feed].update({'ids': {p.id for p in parsed.entries} or {}})
64 if parsed.get('etag', False):
65 self.seen[feed].update({'cache': {'etag': parsed.etag}})
66 if parsed.get('modified', False):
67 self.seen[feed].update({'cache': {'modified': parsed.modified}})
69 def new_posts(self, feed):
70 """Send new posts in feed"""
71 self.plugin.log.debug('feed: : "%s"', feed)
72 if self.seen.get(feed) and self.seen.get(feed).get('cache'):
73 parsed_feed = feed_parse(feed, **self.seen[feed]['cache'])
75 if self.seen.get(feed):
76 self.plugin.log.debug('No cache headers set (etag/modified)')
77 parsed_feed = feed_parse(feed)
78 # Cannot resolve address
79 if 'status' not in parsed_feed:
80 self.plugin.log.error('Error from "%s": %s.',
81 feed, parsed_feed.bozo_exception.__repr__())
84 if parsed_feed.status == 304:
85 self.plugin.log.debug('Got 304 not modified')
87 # unusual return http code
88 if parsed_feed.status != 200:
89 self.plugin.log.warning(
90 'Got code %(status)d from "%(href)s" (please update).',
93 if not self.seen.setdefault(feed):
94 # Fills with post id when first started (prevent from posting all
96 self.seen[feed] = {'cache': None}
97 self._update_cache(feed, parsed_feed)
99 title = '"%s":' % parsed_feed.feed.get('title', 'n/a')
100 xtitle = '<strong>%s</strong>:' % html_escape(
101 parsed_feed.feed.get('title', 'n/a'))
106 entries = {p.id for p in parsed_feed.entries}
107 seen_ids = self.seen.get(feed).get('ids')
108 new_entries = [p for p in parsed_feed.entries
109 if p.id in entries - seen_ids]
110 for post in new_entries:
111 self.plugin.log.info(post.title)
112 body = '%(title)s %(link)s' % post
114 xpost = {'title': html_escape(post.get('title', 'n/a'))}
115 xpost['link'] = html_escape(post.get('link',))
116 xbody = '<a href="{link}">{title}</a>'.format(**xpost)
118 # Updating self.seen, entries and cache headers
119 self._update_cache(feed, parsed_feed)
121 self.plugin.send(self.plugin.bot.room,
122 {'mhtml': '<br />'.join(xhtml), 'mbody': '\n'.join(text)},
126 while not self.thread_killed:
127 self.plugin.log.debug('feeds check')
128 for feed in self.feeds_list:
131 except ConnectionError as err: # Non fatal exception
132 self.plugin.log.error('connection error on %s: %s', feed, err)
133 except URLError as err: # Non fatal exception
134 self.plugin.log.error('error for "%s": %s', feed, err.reason)
135 except Exception as err: # Unknown execption, killing thread anyway
136 self.plugin.log.error('feeds thread crashed: %s', err)
137 self.plugin.log.error(''.join(traceback.format_exc()))
138 self.thread_killed = True
139 self.last_check = datetime.datetime.utcnow()
140 for _ in list(range(self.tempo)):
142 if self.thread_killed:
149 Feeds plugin depends on external module: **feedparser**
152 #: Time between feeds check
154 #: Default feeds to monitor
156 'https://www.debian.org/security/dsa',
157 'https://www.debian.org/News/news',
159 'https://tracker.debian.org/pkg/prosody/rss',
160 'https://tracker.debian.org/pkg/ejabberd/rss',
162 'https://planet.debian.org/atom.xml',
165 def __init__(self, bot):
166 Plugin.__init__(self, bot)
167 self.last_check = None
168 self.th_mon = FeedMonitor(self)
172 self.th_mon.thread_killed = True
175 def feeds(self, rcv, args):
176 """Monitors debian project related feeds.
178 * ``!feeds`` : registred feeds list
179 * ``!feeds last`` : last check time"""
181 date = '{:%Y-%m-%d %H:%M} (utc)'.format(self.th_mon.last_check)
182 self.reply(rcv, f'Last feeds check: {date}')
184 html = ['<a href="{0}">{1}</a>'.format(
186 html_escape('{1}{2}'.format(*urlparse(u)))
187 ) for u in Feeds.FEEDS]
188 msg = {'mbody': 'Feeds:\n' + '\n'.join(Feeds.FEEDS),
189 'mhtml': 'Feeds:<br />' + '<br />'.join(html)}