181 lines
6.0 KiB
Python
Executable File
181 lines
6.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Make sure to edit this in cce/feediverse.org !!!
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import yaml
|
|
import argparse
|
|
import dateutil
|
|
import feedparser
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
from mastodon import Mastodon
|
|
from datetime import datetime, timezone, MINYEAR
|
|
|
|
DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-n", "--dry-run", action="store_true",
|
|
help=("perform a trial run with no changes made: "
|
|
"don't toot, don't save config"))
|
|
parser.add_argument("-v", "--verbose", action="store_true",
|
|
help="be verbose")
|
|
parser.add_argument("-c", "--config",
|
|
help="config file to use",
|
|
default=os.path.expanduser(DEFAULT_CONFIG_FILE))
|
|
|
|
args = parser.parse_args()
|
|
config_file = args.config
|
|
|
|
if args.verbose:
|
|
print("using config file", config_file)
|
|
|
|
if not os.path.isfile(config_file):
|
|
setup(config_file)
|
|
|
|
config = read_config(config_file)
|
|
|
|
newest_post = config['updated']
|
|
per_site_feeds = fetch_dynamic_feeds(config['feeds_index'])
|
|
|
|
for site, feeds in per_site_feeds.items():
|
|
masto = Mastodon(
|
|
api_base_url=config['url'],
|
|
client_id=config['tokens'][site]['client_id'],
|
|
client_secret=config['tokens'][site]['client_secret'],
|
|
access_token=config['tokens'][site]['access_token']
|
|
)
|
|
|
|
for feed in feeds:
|
|
if args.verbose:
|
|
print(f"fetching {feed['url']} entries since {config['updated']}")
|
|
for entry in get_feed(feed['url'], config['updated']):
|
|
newest_post = max(newest_post, entry['updated'])
|
|
if args.verbose:
|
|
print(entry)
|
|
if args.dry_run:
|
|
print("trial run, not tooting ", entry["title"][:50])
|
|
continue
|
|
masto.status_post(feed['post_template'].format(**entry),
|
|
content_type='text/html',
|
|
visbility=feed['visibility'])
|
|
if not args.dry_run:
|
|
config['updated'] = newest_post.isoformat()
|
|
save_config(config, config_file)
|
|
|
|
def fetch_dynamic_feeds(feeds_url):
|
|
feeds = requests.get(feeds_url).json()
|
|
|
|
feeds_by_site = dict()
|
|
for feed in feeds:
|
|
feeds_by_site[feed['site']] = feeds_by_site.get(feed['site'], []) + [feed]
|
|
return feeds_by_site
|
|
|
|
def get_feed(feed_url, last_update):
|
|
feed = feedparser.parse(feed_url)
|
|
if last_update:
|
|
entries = [e for e in feed.entries
|
|
if dateutil.parser.parse(e['updated']) > last_update]
|
|
else:
|
|
entries = feed.entries
|
|
entries.sort(key=lambda e: e.updated_parsed)
|
|
for entry in entries:
|
|
yield get_entry(entry)
|
|
|
|
def get_entry(entry):
|
|
hashtags = []
|
|
for tag in entry.get('tags', []):
|
|
t = tag['term'].replace(' ', '_').replace('.', '').replace('-', '')
|
|
hashtags.append('#{}'.format(t))
|
|
summary = entry.get('summary', '')
|
|
content = entry.get('content', '') or ''
|
|
url = entry.id
|
|
return {
|
|
'url': url,
|
|
'link': entry.link,
|
|
'title': cleanup(entry.title),
|
|
'summary': summary,
|
|
'content': content,
|
|
'hashtags': ' '.join(hashtags),
|
|
'updated': dateutil.parser.parse(entry['updated'])
|
|
}
|
|
|
|
def cleanup(text):
|
|
html = BeautifulSoup(text, 'html.parser')
|
|
text = html.get_text()
|
|
text = re.sub('\xa0+', ' ', text)
|
|
text = re.sub(' +', ' ', text)
|
|
text = re.sub(' +\n', '\n', text)
|
|
text = re.sub('(\w)\n(\w)', '\\1 \\2', text)
|
|
text = re.sub('\n\n\n+', '\n\n', text, flags=re.M)
|
|
return text.strip()
|
|
|
|
def save_config(config, config_file):
|
|
copy = dict(config)
|
|
with open(config_file, 'w') as fh:
|
|
fh.write(yaml.dump(copy, default_flow_style=False))
|
|
|
|
def read_config(config_file):
|
|
config = {
|
|
'updated': datetime(MINYEAR, 1, 1, 0, 0, 0, 0, timezone.utc)
|
|
}
|
|
with open(config_file) as fh:
|
|
cfg = yaml.load(fh, yaml.SafeLoader)
|
|
if 'updated' in cfg:
|
|
cfg['updated'] = dateutil.parser.parse(cfg['updated'])
|
|
config.update(cfg)
|
|
return config
|
|
|
|
def yes_no(question):
|
|
res = input(question + ' [y/n] ')
|
|
return res.lower() in "y1"
|
|
|
|
def setup(config_file):
|
|
url = input('What is your Fediverse Instance URL? ')
|
|
feeds_index = input("What is the arcology feed index URL? ")
|
|
tokens = dict()
|
|
for site in fetch_dynamic_feeds(feeds_index).keys():
|
|
print(f"Configuring for {site}...")
|
|
print("I'll need a few things in order to get your access token")
|
|
name = input('app name (e.g. feediverse): ') or "feediverse"
|
|
client_id, client_secret = Mastodon.create_app(
|
|
api_base_url=url,
|
|
client_name=name,
|
|
#scopes=['read', 'write'],
|
|
website='https://github.com/edsu/feediverse'
|
|
)
|
|
username = input('mastodon username (email): ')
|
|
password = input('mastodon password (not stored): ')
|
|
m = Mastodon(client_id=client_id, client_secret=client_secret, api_base_url=url)
|
|
access_token = m.log_in(username, password)
|
|
|
|
tokens[site] = {
|
|
'client_id': client_id,
|
|
'client_secret': client_secret,
|
|
'access_token': access_token,
|
|
}
|
|
|
|
old_posts = yes_no('Shall already existing entries be tooted, too?')
|
|
config = {
|
|
'name': name,
|
|
'url': url,
|
|
'feeds_index': feeds_index,
|
|
'tokens': tokens,
|
|
'post_template': '{title} {summary} {url}'
|
|
}
|
|
if not old_posts:
|
|
config['updated'] = datetime.now(tz=timezone.utc).isoformat()
|
|
save_config(config, config_file)
|
|
print("")
|
|
print("Your feediverse configuration has been saved to {}".format(config_file))
|
|
print("Add a line line this to your crontab to check every 15 minutes:")
|
|
print("*/15 * * * * /usr/local/bin/feediverse")
|
|
print("")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|