152 lines
5.4 KiB
Python
152 lines
5.4 KiB
Python
from matrix_client.client import MatrixClient
|
|
import feedparser
|
|
from jinja2 import Template
|
|
import argparse
|
|
import yaml
|
|
import urllib2
|
|
from PIL import Image
|
|
|
|
import sys
|
|
import os
|
|
from sets import Set
|
|
import time
|
|
import re
|
|
|
|
IMAGE_MATCH_RE = re.compile(r'<img src="([^"]*)')
|
|
|
|
|
|
class Feedbot():
|
|
|
|
def __init__(self, config):
|
|
self.config = config
|
|
self.feeds = config['feeds']
|
|
self.room_ids = Set([config['default_room']])
|
|
for feed in self.feeds:
|
|
rooms = feed.get('rooms') or [self.config['default_room']]
|
|
for room in rooms:
|
|
self.room_ids.add(room)
|
|
self.client = MatrixClient(config['homeserver_uri'])
|
|
self.client.api.validate_certificate(False)
|
|
self.rooms = {}
|
|
|
|
with open(os.path.join(self.config['data_path'], 'template.j2')) as f:
|
|
self.template = Template(f.read())
|
|
self.watermarks_path = os.path.join(self.config['data_path'], 'watermarks.yaml')
|
|
if os.path.isfile(self.watermarks_path):
|
|
with open(self.watermarks_path) as f:
|
|
self.watermarks = yaml.safe_load(f.read())
|
|
else:
|
|
self.watermarks = {}
|
|
|
|
def connect_and_join(self):
|
|
self.token = self.client.login_with_password(
|
|
username=self.config['username'],
|
|
password=self.config['password'])
|
|
for room_id in self.room_ids:
|
|
self.rooms[room_id] = self.client.join_room(room_id)
|
|
time.sleep(3)
|
|
print "Logged in, joined %s" % self.rooms
|
|
|
|
def loop(self):
|
|
while(True):
|
|
for feed in self.feeds:
|
|
try:
|
|
self.update_feed(feed)
|
|
except StandardError, e:
|
|
print e
|
|
time.sleep(120)
|
|
|
|
def update_feed(self, feed):
|
|
fetched = feedparser.parse(feed['url'], agent="curl/7.53.1")
|
|
print "Fetched %s" % feed['url']
|
|
chan_title = fetched.feed.get('title') or feed['url']
|
|
fetched.entries.reverse()
|
|
for entry in fetched.entries:
|
|
dt = entry.updated_parsed
|
|
content = ""
|
|
if entry.get('content'):
|
|
content = entry.content[0]['value']
|
|
elif entry.get('summary_detail'):
|
|
content = entry.summary_detail['value']
|
|
title = entry.get('title') or ""
|
|
link = entry.get('link') or ""
|
|
if (not self.watermarks.get(feed['url'])) or time.mktime(dt) > self.watermarks[feed['url']]:
|
|
image_mxid, image_info = self.maybe_extract_and_upload_image(content)
|
|
string = self.template.render(
|
|
chan_title=chan_title,
|
|
date=time.strftime("%Y-%m-%d %H:%MZ", dt),
|
|
content=content,
|
|
title=title,
|
|
link=link
|
|
)
|
|
print "Posting %s" % link
|
|
rooms = feed.get('rooms') or [self.config['default_room']]
|
|
for room_id in rooms:
|
|
try:
|
|
self.send_to_room(room_id, string, image_mxid, image_info)
|
|
except:
|
|
pass
|
|
self.update_watermark(feed, dt)
|
|
|
|
def maybe_extract_and_upload_image(self, content):
|
|
md = re.match(IMAGE_MATCH_RE, content)
|
|
if md:
|
|
for url in md.groups():
|
|
f = urllib2.urlopen(url)
|
|
with open('/tmp/data', 'wb') as tmpf:
|
|
tmpf.write(f.read())
|
|
|
|
img = Image.open('/tmp/data')
|
|
mimetype = "image/jpeg"
|
|
if img.format == "GIF":
|
|
mimetype = "image/gif"
|
|
elif img.format == "PNG":
|
|
mimetype = "image/png"
|
|
|
|
resp = None
|
|
with open('/tmp/data', 'rb') as tmpf:
|
|
resp = self.client.api.upload_media(mimetype, tmpf.read())
|
|
print resp
|
|
info = { 'h': img.height, 'w': img.width, 'mimetype': mimetype}
|
|
print info
|
|
return resp.get('content_uri'), info
|
|
|
|
def send_to_room(self, room_id, string, image_mxid, image_info):
|
|
self.client.api.send_message_event(room_id, 'm.room.message', {
|
|
"body": string,
|
|
"format": "org.matrix.custom.html",
|
|
"formatted_body": string,
|
|
"msgtype": "m.text"
|
|
})
|
|
if image_mxid:
|
|
self.client.api.send_message_event(room_id, 'm.room.message', {
|
|
"body": image_mxid,
|
|
"msgtype": "m.image",
|
|
"url": image_mxid,
|
|
"info": image_info
|
|
})
|
|
time.sleep(3)
|
|
|
|
def update_watermark(self, feed, dt):
|
|
self.watermarks[feed['url']] = time.mktime(dt)
|
|
with open(self.watermarks_path, 'w') as f:
|
|
f.write(yaml.dump(self.watermarks))
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="RSS to Matrix")
|
|
parser.add_argument('--config_path', "-c")
|
|
parser.add_argument('--data_path', "-d")
|
|
args = parser.parse_args()
|
|
if not os.path.exists(args.config_path):
|
|
print("Configuration file doesn't exist, please create one")
|
|
sys.exit(1)
|
|
if not os.path.exists(args.data_path):
|
|
print("Data directory doesn't exist, please create one")
|
|
sys.exit(1)
|
|
with open(args.config_path) as f:
|
|
conf = yaml.safe_load(f)
|
|
conf['data_path'] = args.data_path
|
|
feedbot = Feedbot(conf)
|
|
feedbot.connect_and_join()
|
|
feedbot.loop()
|