374 lines
12 KiB
Org Mode
374 lines
12 KiB
Org Mode
:PROPERTIES:
|
|
:ID: 20230125T143144.011175
|
|
:ROAM_REFS: https://github.com/edsu/feediverse/
|
|
:ROAM_ALIASES: Feediverse
|
|
:END:
|
|
#+TITLE: Posting Arcology Feeds to the Fediverse Automatically with Feediverse
|
|
#+filetags: :Project:CCE:Fediverse:
|
|
|
|
#+ARCOLOGY_KEY: arcology/feediverse
|
|
#+ARCOLOGY_ALLOW_CRAWL: t
|
|
#+AUTO_TANGLE: t
|
|
|
|
#+begin_quote
|
|
feediverse will read RSS/Atom feeds and send the messages as Mastodon posts. It's meant to add a little bit of spice to your timeline from other places. Please use it responsibly.
|
|
#+end_quote
|
|
|
|
I was not convinced that [[id:20221227T164309.780458][feed2toot]] was the right way to go about this and in trying to extend it, I found myself frustrated. Well, here's a simpler single-file solution. I extended [[id:1d917282-ecf4-4d4c-ba49-628cbb4bb8cc][The Arcology Project]] to expose a JSON list of feeds and their metadata and with my modified version of =feediverse= I can post all of my sites' toots with one command.
|
|
|
|
* =feediverse.py=
|
|
|
|
This is a lightly modified version of the referenced =feediverse.py= above, with my modifications distributed under the [[id:20220116T143655.499306][Hey Smell This]] license.
|
|
|
|
This thing is, basically, simple to operate. it's driven by a YAML configuration file:
|
|
|
|
#+begin_src yaml :tangle ~/Code/feediverse/config.yml.sample
|
|
tokens:
|
|
lionsrear: *lionsrear-creds
|
|
garden: *garden-creds
|
|
cce: *garden-creds
|
|
arcology: *garden-creds
|
|
|
|
feeds_index: https://thelionsrear.com/feeds.json
|
|
post_template: >-
|
|
NEW by @rrix@notes.whatthefuck.computer: {summary}
|
|
|
|
{url} {hashtags}
|
|
updated: '2023-01-25T06:13:50.343361+00:00'
|
|
url: https://notes.whatthefuck.computer
|
|
#+end_src
|
|
|
|
This file will be created the first time you run this command; in my case it's generated locally and then copied to my [[id:20211120T220054.226284][Wobserver]] in the NixOS declarations below.
|
|
|
|
#+begin_src python :noweb-ref config-load-save
|
|
def save_config(config, config_file):
|
|
copy = dict(config)
|
|
with open(config_file, 'w') as fh:
|
|
fh.write(yaml.dump(copy, default_flow_style=False))
|
|
|
|
def read_config(config_file):
|
|
config = {
|
|
'updated': datetime(MINYEAR, 1, 1, 0, 0, 0, 0, timezone.utc)
|
|
}
|
|
with open(config_file) as fh:
|
|
cfg = yaml.load(fh, yaml.SafeLoader)
|
|
if 'updated' in cfg:
|
|
cfg['updated'] = dateutil.parser.parse(cfg['updated'])
|
|
config.update(cfg)
|
|
return config
|
|
#+end_src
|
|
|
|
So the =/feeds.json= in the [[id:20220225T175638.482695][Arcology Router]] returns a list of objects, in here it's re-key'd to be a per-site dictionary and returned:
|
|
|
|
#+begin_src python :noweb-ref fetch-feeds
|
|
def fetch_dynamic_feeds(feeds_url):
|
|
feeds = requests.get(feeds_url,
|
|
headers={"User-Agent": "feediverse 0.0.1"}).json()
|
|
|
|
feeds_by_site = dict()
|
|
for feed in feeds:
|
|
feeds_by_site[feed['site']] = feeds_by_site.get(feed['site'], []) + [feed]
|
|
return feeds_by_site
|
|
#+end_src
|
|
|
|
With that loaded, it's possible to just loop over the sites, and then loop over each feed in the site to post new entries from them:
|
|
|
|
#+begin_src python :noweb-ref inner-loop
|
|
newest_post = config['updated']
|
|
per_site_feeds = fetch_dynamic_feeds(config['feeds_index'])
|
|
|
|
for site, feeds in per_site_feeds.items():
|
|
masto = Mastodon(
|
|
api_base_url=config['url'],
|
|
feature_set='pleroma',
|
|
client_id=config['tokens'][site]['client_id'],
|
|
client_secret=config['tokens'][site]['client_secret'],
|
|
access_token=config['tokens'][site]['access_token']
|
|
)
|
|
|
|
for feed in feeds:
|
|
if args.verbose:
|
|
print(f"fetching {feed['url']} entries since {config['updated']}")
|
|
for entry in get_feed(feed['url'], config['updated']):
|
|
newest_post = max(newest_post, entry['updated'])
|
|
if args.verbose:
|
|
print(entry)
|
|
if args.dry_run:
|
|
print("trial run, not tooting ", entry["title"][:50])
|
|
continue
|
|
masto.status_post(config['post_template'].format(**entry),
|
|
content_type='text/html',
|
|
visibility=feed['visibility'])
|
|
if not args.dry_run:
|
|
config['updated'] = newest_post.isoformat()
|
|
save_config(config, config_file)
|
|
#+end_src
|
|
|
|
All the feed-parsing stuff is more or less lifted directly from the original =feediverse=, but modified to just post the HTML directly to [[id:20221202T122135.502628][+Akkoma+ Pleroma]].
|
|
|
|
#+begin_src python :noweb-ref feed-parsing
|
|
def get_feed(feed_url, last_update):
|
|
feed = feedparser.parse(feed_url)
|
|
if last_update:
|
|
entries = [
|
|
e for e in feed.entries
|
|
if dateutil.parser.parse(e['updated']) > last_update
|
|
]
|
|
# entries = []
|
|
# for e in feed.entries:
|
|
# if dateutil.parser.parse(e['updated']) > last_update:
|
|
# entries.append(e)
|
|
else:
|
|
entries = feed.entries
|
|
entries.sort(key=lambda e: e.updated_parsed)
|
|
for entry in entries:
|
|
yield get_entry(entry)
|
|
|
|
def get_entry(entry):
|
|
hashtags = []
|
|
for tag in entry.get('tags', []):
|
|
t = tag['term'].replace(' ', '_').replace('.', '').replace('-', '')
|
|
hashtags.append('#{}'.format(t))
|
|
summary = entry.get('summary', '')
|
|
content = entry.get('content', '') or ''
|
|
url = entry.id
|
|
return {
|
|
'url': url,
|
|
'link': entry.link,
|
|
'title': cleanup(entry.title),
|
|
'summary': cleanup(summary, strip_html=False),
|
|
'content': content,
|
|
'hashtags': ' '.join(hashtags),
|
|
'updated': dateutil.parser.parse(entry['updated'])
|
|
}
|
|
|
|
def cleanup(text, strip_html=True):
|
|
if strip_html:
|
|
html = BeautifulSoup(text, 'html.parser')
|
|
text = html.get_text()
|
|
text = re.sub('\xa0+', ' ', text)
|
|
text = re.sub(' +', ' ', text)
|
|
text = re.sub(' +\n', '\n', text)
|
|
text = re.sub('(\w)\n(\w)', '\\1 \\2', text)
|
|
text = re.sub('\n\n\n+', '\n\n', text, flags=re.M)
|
|
return text.strip()
|
|
#+end_src
|
|
|
|
Setting up the config file is a bit different than the upstream stuff because my version supports setting up multiple accounts on a single instance. I made the design decision to only support one fedi instance per feedi instance, if you want to run this on multiple fedi servers, you'll need to run more than one config file or just don't.
|
|
|
|
#+begin_src python :noweb-ref setup-config
|
|
def yes_no(question):
|
|
res = input(question + ' [y/n] ')
|
|
return res.lower() in "y1"
|
|
|
|
def setup(config_file):
|
|
url = input('What is your Fediverse Instance URL? ')
|
|
feeds_index = input("What is the arcology feed index URL? ")
|
|
tokens = dict()
|
|
for site in fetch_dynamic_feeds(feeds_index).keys():
|
|
print(f"Configuring for {site}...")
|
|
print("I'll need a few things in order to get your access token")
|
|
name = input('app name (e.g. feediverse): ') or "feediverse"
|
|
client_id, client_secret = Mastodon.create_app(
|
|
api_base_url=url,
|
|
client_name=name,
|
|
#scopes=['read', 'write'],
|
|
website='https://github.com/edsu/feediverse'
|
|
)
|
|
username = input('mastodon username (email): ')
|
|
password = input('mastodon password (not stored): ')
|
|
m = Mastodon(client_id=client_id, client_secret=client_secret, api_base_url=url)
|
|
access_token = m.log_in(username, password)
|
|
|
|
tokens[site] = {
|
|
'client_id': client_id,
|
|
'client_secret': client_secret,
|
|
'access_token': access_token,
|
|
}
|
|
|
|
old_posts = yes_no('Shall already existing entries be tooted, too?')
|
|
config = {
|
|
'name': name,
|
|
'url': url,
|
|
'feeds_index': feeds_index,
|
|
'tokens': tokens,
|
|
'post_template': '{title} {summary} {url}'
|
|
}
|
|
if not old_posts:
|
|
config['updated'] = datetime.now(tz=timezone.utc).isoformat()
|
|
save_config(config, config_file)
|
|
print("")
|
|
print("Your feediverse configuration has been saved to {}".format(config_file))
|
|
print("Add a line line this to your crontab to check every 15 minutes:")
|
|
print("*/15 * * * * /usr/local/bin/feediverse")
|
|
print("")
|
|
#+end_src
|
|
|
|
All of that is assembled together in to a single command which takes a =--dry-run=, =--verbose= and =--config= argument to operate:
|
|
|
|
#+begin_src python :tangle ~/Code/feediverse/feediverse.py :noweb yes :shebang #!/usr/bin/env python3
|
|
# Make sure to edit this in cce/feediverse.org !!!
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import yaml
|
|
import argparse
|
|
import dateutil
|
|
import feedparser
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
from mastodon import Mastodon
|
|
from datetime import datetime, timezone, MINYEAR
|
|
|
|
DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-n", "--dry-run", action="store_true",
|
|
help=("perform a trial run with no changes made: "
|
|
"don't toot, don't save config"))
|
|
parser.add_argument("-v", "--verbose", action="store_true",
|
|
help="be verbose")
|
|
parser.add_argument("-c", "--config",
|
|
help="config file to use",
|
|
default=os.path.expanduser(DEFAULT_CONFIG_FILE))
|
|
|
|
args = parser.parse_args()
|
|
config_file = args.config
|
|
|
|
if args.verbose:
|
|
print("using config file", config_file)
|
|
|
|
if not os.path.isfile(config_file):
|
|
setup(config_file)
|
|
|
|
config = read_config(config_file)
|
|
|
|
<<inner-loop>>
|
|
|
|
<<fetch-feeds>>
|
|
|
|
<<feed-parsing>>
|
|
|
|
<<config-load-save>>
|
|
|
|
<<setup-config>>
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
#+end_src
|
|
|
|
* Packaging =feediverse= in [[id:20221021T121120.541960][rixpkgs]]
|
|
|
|
This is pretty easy to get running; if you do this yourself, you'll want to override =src= to point to [[https://code.rix.si/rrix/feediverse]], but I don't like remembering to push my changes 😇
|
|
|
|
#+begin_src nix :tangle ~/arroyo-nix/pkgs/feediverse.nix
|
|
{ lib,
|
|
buildPythonPackage,
|
|
fetchPypi,
|
|
beautifulsoup4,
|
|
feedparser,
|
|
python-dateutil,
|
|
requests,
|
|
mastodon-py,
|
|
pyyaml,
|
|
python,
|
|
}:
|
|
|
|
buildPythonPackage rec {
|
|
pname = "feediverse";
|
|
version = "0.0.1";
|
|
|
|
src = /home/rrix/Code/feediverse;
|
|
|
|
propagatedBuildInputs = [
|
|
beautifulsoup4
|
|
feedparser
|
|
python-dateutil
|
|
requests
|
|
pyyaml
|
|
mastodon-py
|
|
];
|
|
|
|
meta = with lib; {
|
|
homepage = "https://code.rix.si/rrix/feediverse";
|
|
description = "feediverse will read RSS/Atom feeds and send the messages as Mastodon posts.";
|
|
license = licenses.mit;
|
|
maintainers = with maintainers; [ rrix ];
|
|
};
|
|
}
|
|
#+end_src
|
|
|
|
* =nix-shell= for developing feediverse
|
|
|
|
Simple enough to get a dev environment running rather than using =venv=...
|
|
|
|
#+begin_src nix :tangle ~/Code/feediverse/shell.nix
|
|
{ pkgs ? import <nixpkgs> {} }:
|
|
|
|
let
|
|
myPy = pkgs.python3.withPackages (ps: with ps; [
|
|
beautifulsoup4
|
|
feedparser
|
|
python-dateutil
|
|
requests
|
|
pyyaml
|
|
mastodon-py
|
|
]);
|
|
in myPy.env
|
|
#+end_src
|
|
|
|
* Running =feediverse= on [[id:20211120T220054.226284][The Wobserver]]
|
|
|
|
Okay, with the configuration file generated and then copied on to the server (since it's mutated by the script...), I shove it in to the [[id:20221021T150631.404359][Arroyo Nix]] index and then set up an [[id:arroyo/nixos][Arroyo NixOS]] module to set up a service account and run it with a SystemD timer. This will be pretty straightforward if you've seen NixOS before.
|
|
|
|
#+ARROYO_NIXOS_MODULE: nixos/feediverse.nix
|
|
#+ARROYO_SYSTEM_ROLE: server
|
|
|
|
#+begin_src nix :tangle ~/arroyo-nix/nixos/feediverse.nix
|
|
{ pkgs, lib, config, ... }:
|
|
|
|
{
|
|
ids.uids.feediverse = 902;
|
|
ids.gids.bots = 902;
|
|
|
|
users.groups.bots = {
|
|
gid = config.ids.gids.bots;
|
|
};
|
|
|
|
users.users.feediverse = {
|
|
home = "/srv/feediverse";
|
|
group = "bots";
|
|
uid = config.ids.uids.feediverse;
|
|
isSystemUser = true;
|
|
};
|
|
|
|
systemd.services.feediverse = {
|
|
description = "Feeds to Toots";
|
|
after = ["pleroma.service"];
|
|
wantedBy = ["default.target"];
|
|
script =
|
|
''
|
|
${pkgs.feediverse}/bin/feediverse -c ${config.users.users.feediverse.home}/feediverse.yml
|
|
'';
|
|
serviceConfig = {
|
|
User = "feediverse";
|
|
WorkingDirectory = config.users.users.feediverse.home;
|
|
};
|
|
};
|
|
systemd.timers.feediverse = {
|
|
description = "Start feediverse on the quarter-hour";
|
|
timerConfig = {
|
|
OnUnitActiveSec = "15 minutes";
|
|
OnStartupSec = "15 minutes";
|
|
};
|
|
wantedBy = [ "default.target" ];
|
|
};
|
|
}
|
|
#+end_src
|
|
|
|
* NEXT Consider making the config file immutable by storing the update timestamp in a different location
|