matrix-feedbot/feedbot.py

152 lines
5.4 KiB
Python

from matrix_client.client import MatrixClient
import feedparser
from jinja2 import Template
import argparse
import yaml
import urllib2
from PIL import Image
import sys
import os
from sets import Set
import time
import re
IMAGE_MATCH_RE = re.compile(r'<img src="([^"]*)')
class Feedbot():
def __init__(self, config):
self.config = config
self.feeds = config['feeds']
self.room_ids = Set([config['default_room']])
for feed in self.feeds:
rooms = feed.get('rooms') or [self.config['default_room']]
for room in rooms:
self.room_ids.add(room)
self.client = MatrixClient(config['homeserver_uri'])
self.client.api.validate_certificate(False)
self.rooms = {}
with open(os.path.join(self.config['data_path'], 'template.j2')) as f:
self.template = Template(f.read())
self.watermarks_path = os.path.join(self.config['data_path'], 'watermarks.yaml')
if os.path.isfile(self.watermarks_path):
with open(self.watermarks_path) as f:
self.watermarks = yaml.safe_load(f.read())
else:
self.watermarks = {}
def connect_and_join(self):
self.token = self.client.login_with_password(
username=self.config['username'],
password=self.config['password'])
for room_id in self.room_ids:
self.rooms[room_id] = self.client.join_room(room_id)
time.sleep(3)
print "Logged in, joined %s" % self.rooms
def loop(self):
while(True):
for feed in self.feeds:
try:
self.update_feed(feed)
except StandardError, e:
print e
time.sleep(120)
def update_feed(self, feed):
fetched = feedparser.parse(feed['url'], agent="curl/7.53.1")
print "Fetched %s" % feed['url']
chan_title = fetched.feed.get('title') or feed['url']
fetched.entries.reverse()
for entry in fetched.entries:
dt = entry.updated_parsed
content = ""
if entry.get('content'):
content = entry.content[0]['value']
elif entry.get('summary_detail'):
content = entry.summary_detail['value']
title = entry.get('title') or ""
link = entry.get('link') or ""
if (not self.watermarks.get(feed['url'])) or time.mktime(dt) > self.watermarks[feed['url']]:
image_mxid, image_info = self.maybe_extract_and_upload_image(content)
string = self.template.render(
chan_title=chan_title,
date=time.strftime("%Y-%m-%d %H:%MZ", dt),
content=content,
title=title,
link=link
)
print "Posting %s" % link
rooms = feed.get('rooms') or [self.config['default_room']]
for room_id in rooms:
try:
self.send_to_room(room_id, string, image_mxid, image_info)
except:
pass
self.update_watermark(feed, dt)
def maybe_extract_and_upload_image(self, content):
md = re.match(IMAGE_MATCH_RE, content)
if md:
for url in md.groups():
f = urllib2.urlopen(url)
with open('/tmp/data', 'wb') as tmpf:
tmpf.write(f.read())
img = Image.open('/tmp/data')
mimetype = "image/jpeg"
if img.format == "GIF":
mimetype = "image/gif"
elif img.format == "PNG":
mimetype = "image/png"
resp = None
with open('/tmp/data', 'rb') as tmpf:
resp = self.client.api.upload_media(mimetype, tmpf.read())
print resp
info = { 'h': img.height, 'w': img.width, 'mimetype': mimetype}
print info
return resp.get('content_uri'), info
def send_to_room(self, room_id, string, image_mxid, image_info):
self.client.api.send_message_event(room_id, 'm.room.message', {
"body": string,
"format": "org.matrix.custom.html",
"formatted_body": string,
"msgtype": "m.text"
})
if image_mxid:
self.client.api.send_message_event(room_id, 'm.room.message', {
"body": image_mxid,
"msgtype": "m.image",
"url": image_mxid,
"info": image_info
})
time.sleep(3)
def update_watermark(self, feed, dt):
self.watermarks[feed['url']] = time.mktime(dt)
with open(self.watermarks_path, 'w') as f:
f.write(yaml.dump(self.watermarks))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="RSS to Matrix")
parser.add_argument('--config_path', "-c")
parser.add_argument('--data_path', "-d")
args = parser.parse_args()
if not os.path.exists(args.config_path):
print("Configuration file doesn't exist, please create one")
sys.exit(1)
if not os.path.exists(args.data_path):
print("Data directory doesn't exist, please create one")
sys.exit(1)
with open(args.config_path) as f:
conf = yaml.safe_load(f)
conf['data_path'] = args.data_path
feedbot = Feedbot(conf)
feedbot.connect_and_join()
feedbot.loop()