44 KiB
The Arcology's Data Models and Web Server
- Data Models for Sites, Web Features, and Feeds
- The Web Server
- NEXT Testing
Data Models for Sites, Web Features, and Feeds
from __future__ import annotations
from typing import Optional, List
from django.db import models
from django.conf import settings
from django_prometheus.models import ExportModelOperationsMixin as EMOM
import arrow
import arroyo.arroyo_rs as native
from arcology.cache_decorator import cache
import roam.models
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)
# used for some memoization
class hashabledict(dict):
def __hash__(self):
return hash(tuple(sorted(self.items())))
Site
A Site
has many SiteDomain
's. It has a routing key, and a title, and some CSS and customization. There are a few helper classmethods to take an input request or routing key and output a Site object based on the SiteDomain or whatnot. I'm not sure I want the async
definitions to stick around, there needs to be some consideration of what should be async
in this system and where asgi can be relied on for concurrency.
Sites are created in the Arcology Seed Command.
# Sites and SiteDomains are created in django-admin or a seed rather than from arroyo parser, no create_from_arroyo..!
class Site(EMOM('site'), models.Model):
key = models.CharField(max_length=512, primary_key=True)
title = models.CharField(max_length=512)
# add choices
css_file = models.CharField(max_length=512, blank=True, default=None)
# this is used in sitemap, and maybe links..
link_color = models.CharField(max_length=8, blank=True, default=None)
def urlize_page(self, page: Page, heading: Optional[roam.models.Heading] = None):
domain = self.sitedomain_set.first().domain
key_rest = page.route_key.split("/", 1)[1]
url = f"https://{domain}/{key_rest}"
if heading is not None:
url = url + f"#{heading.node_id}"
return url
def urlize_feed(self, feed: Feed):
domain = self.sitedomain_set.first().domain
key_rest = feed.route_key.split("/", 1)[1]
url = f"https://{domain}/{key_rest}"
return url
@classmethod
def from_route(cls: Site, route_key: str) -> Site:
site_key = route_key.split("/")[0]
site = cls.objects.get(key=site_key)
assert site is not None
return site
@classmethod
def from_request(cls: Site, request) -> Site:
host = request.headers.get("Host")
site = cls.objects.filter(sitedomain__domain=host).first()
assert site is not None
return site
class SiteDomain(EMOM('site_domain'), models.Model):
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
domain = models.CharField(max_length=512)
Base migration
migrations.CreateModel(
name="Site",
fields=[
(
"key",
models.CharField(max_length=512, primary_key=True, serialize=False),
),
("title", models.CharField(max_length=512)),
(
"css_file",
models.CharField(blank=True, default=None, max_length=512),
),
(
"link_color",
models.CharField(blank=True, default=None, max_length=8),
),
],
),
migrations.CreateModel(
name="SiteDomain",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"site",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="arcology.site"
),
),
("domain", models.CharField(default="localhost", max_length=512)),
],
),
Page
A site has many pages. Pages have a routing key defined by the ARCOLOGY_KEY
keyword, a title based on the level-0 heading, and some metadata besides that.
These are created using the create_from_arroyo">create_from_arroyo
pattern which makes it easy for the Arcology ingest_files Command to include new functionality in to the system.
class Page(EMOM('page'), models.Model):
file = models.ForeignKey(
roam.models.File,
on_delete=models.CASCADE,
)
route_key = models.CharField(max_length=512, primary_key=True)
root_heading = models.ForeignKey(roam.models.Heading, on_delete=models.CASCADE)
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
title = models.CharField(max_length=512)
allow_crawl = models.BooleanField(default=False)
def to_url(self):
site = self.site
return site.urlize_page(self)
def to_url_path(self):
key_rest = self.route_key.split("/", 1)[1]
return f"/{key_rest}"
def collect_keywords(self):
return self.file.keyword_set
def collect_tags(self):
return [
tag
for heading in self.file.heading_set.all()
for tag in heading.tag_set.all()
]
def collect_references(self):
return [
reference
for heading in self.file.heading_set.all()
for reference in heading.reference_set.all()
]
def collect_links(self):
my_headings = self.file.heading_set.all()
link_objs = self.file.outbound_links.all()
ret = {
h.node_id: h.to_url() for h in my_headings
}
for el in link_objs:
try:
h = el.dest_heading
url = h.to_url()
ret[h.node_id] = url
logger.info(f"link {url} from {el}")
except roam.models.Heading.DoesNotExist:
logger.info(f"{el} does not have dest")
return ret
def collect_backlinks(self) -> List[Link]:
my_headings = self.file.heading_set.all()
return set(roam.models.Link.objects.filter(dest_heading__in=my_headings))
def to_html(self, links, heading=None, include_subheadings=False):
return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)
@cache(key_prefix="page_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
if heading is not None:
headings = [heading]
else:
headings = []
opts = native.ExportOptions(
link_retargets=links,
limit_headings=headings,
include_subheadings=include_subheadings,
ignore_tags=settings.IGNORED_ROAM_TAGS,
)
return native.htmlize_file(self.file.path, opts)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> Page:
f = roam.models.File.objects.get(path=doc.path)
route_key = next(iter(doc.collect_keywords("ARCOLOGY_KEY")), "")
allow_crawl = (
next(iter(doc.collect_keywords("ARCOLOGY_ALLOW_CRAWL")), False) is not False
)
site = Site.from_route(route_key)
root_heading = f.heading_set.filter(level=0)[0]
title = root_heading.title
return cls.objects.get_or_create(
file=f,
route_key=route_key,
allow_crawl=allow_crawl,
site=site,
root_heading=root_heading,
title=title,
)[0]
Base migration:
migrations.CreateModel(
name="Page",
fields=[
(
"route_key",
models.CharField(max_length=512, primary_key=True, serialize=False),
),
("title", models.CharField(max_length=512)),
("allow_crawl", models.BooleanField(default=False)),
(
"file",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="roam.file"
),
),
(
"root_heading",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="roam.heading"
),
),
(
"site",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="arcology.site"
),
),
],
),
Feed
Pages can define an Atom feed + Feediverse feeds by tagging a page with ARCOLOGY_FEED
keyword and making sure the headings have a PUBDATE
an ID
property. This feature relies on Pandoc right now, I'll need to write a custom Atom exporter in The arroyo_rs Native Org Parser when it comes time to implement these feeds.
These are also created using the create_from_arroyo">create_from_arroyo
pattern which makes it easy for the Arcology ingest_files Command to include new functionality in to the system.
class Feed(EMOM('feed'), models.Model):
POST_VISIBILITY = [
("unlisted", "Unlisted"),
("private", "Private"),
("public", "Public"),
("direct", "direct"), # might be different, XXX
]
file = models.ForeignKey(
roam.models.File,
on_delete=models.CASCADE,
)
route_key = models.CharField(max_length=512, primary_key=True)
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
title = models.CharField(max_length=512)
visibility = models.CharField(max_length=512, choices=POST_VISIBILITY)
def url(self):
return self.site.urlize_feed(self)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> Feed | None:
route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None)
if not route_key:
return None
visibility = next(
iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private"
)
f = roam.models.File.objects.get(path=doc.path)
site = Site.from_route(route_key)
root_heading = f.heading_set.filter(level=0)[0]
title = root_heading.title
return cls.objects.get_or_create(
file=f,
route_key=route_key,
title=title,
visibility=visibility,
site=site,
)[0]
@classmethod
async def aget(cls, **kwargs):
return await cls.objects.prefetch_related("file", "site").aget(
**kwargs
)
Base migration
migrations.CreateModel(
name="Feed",
fields=[
(
"route_key",
models.CharField(max_length=512, primary_key=True, serialize=False),
),
("title", models.CharField(max_length=512)),
(
"visibility",
models.CharField(
choices=[
("unlisted", "Unlisted"),
("private", "Private"),
("public", "Public"),
("direct", "direct"),
],
max_length=512,
),
),
(
"file",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="roam.file"
),
),
(
"site",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="arcology.site"
),
),
],
),
FeedEntry
A FeedEntry is a Heading with a PUBDATE property that exists on a page w/ ARCOLOGY_FEED Keyword. These are used to construct Feeds
class FeedEntry(EMOM('feed_entry'), models.Model):
POST_VISIBILITY = [
("unlisted", "Unlisted"),
("private", "Private"),
("public", "Public"),
("direct", "direct"), # might be different, XXX
]
heading = models.ForeignKey(
roam.models.Heading,
on_delete=models.CASCADE,
)
feed = models.ForeignKey(
Feed,
on_delete=models.CASCADE,
)
route_key = models.CharField(max_length=512)
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
title = models.CharField(max_length=512)
visibility = models.CharField(max_length=512, choices=POST_VISIBILITY)
pubdate = models.DateTimeField(auto_now=False)
def to_html(self, links):
return self._to_html_memoized(hashabledict(links), self.heading.path.digest)
@cache(key_prefix="feedentry_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, _file_digest):
opts = native.ExportOptions(
link_retargets=links,
limit_headings=[self.heading.node_id],
include_subheadings=True,
ignore_tags=settings.IGNORED_ROAM_TAGS,
)
return native.htmlize_file(self.heading.path.path, opts)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Feed] | None:
route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None)
if not route_key:
return None
visibility = next(
iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private"
)
site = Site.from_route(route_key)
# f = roam.models.File.objects.get(path=doc.path)
feed = Feed.objects.get(route_key=route_key)
rets = []
for nheading in doc.headings:
if nheading.id is not None:
heading = roam.models.Heading.objects.get(node_id=nheading.id)
pdqs = heading.headingproperty_set.filter(keyword="PUBDATE")
if not pdqs.exists():
continue
v = pdqs.first().value
pubdate = arrow.get(v, "YYYY-MM-DD ddd H:mm").format(arrow.FORMAT_RFC3339)
title = heading.title
rets += [cls.objects.get_or_create(
heading=heading,
feed=feed,
route_key=route_key,
title=title,
pubdate=pubdate,
visibility=visibility,
site=site,
)[0]]
# root_heading = f.heading_set.filter(level=0)[0]
# title = root_heading.title
return rets
Database Migrations
0001_base
These are assembled from the snippets described in the models above.
# Generated by Django 4.2.6 on 2023-12-18 02:46
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
replaces = [("arcology", "0001_initial"), ("arcology", "0002_sitedomain_domain")]
dependencies = [
("roam", "0005_alter_link_dest_heading"),
]
operations = [
<<migration-site>>
<<migration-page>>
<<migration-feed>>
]
NEXT admin
dont worry too much about these; they are just used to validate that the data is ingested properly, to be honest.
from django.contrib import admin
import arcology.models
class DomainInline(admin.TabularInline):
model = arcology.models.SiteDomain
@admin.register(arcology.models.Site)
class SiteAdmin(admin.ModelAdmin):
inlines = [DomainInline]
@admin.register(arcology.models.Page)
class PageAdmin(admin.ModelAdmin):
pass
@admin.register(arcology.models.Feed)
class FeedAdmin(admin.ModelAdmin):
pass
@admin.register(arcology.models.FeedEntry)
class FeedEntryAdmin(admin.ModelAdmin):
list_display = ["heading", "route_key", "pubdate", "title"]
The Web Server
These are the route urlpatterns:
from django.contrib import admin
from django.urls import path, re_path, include
from django.conf import settings
from arcology import views
urlpatterns = [
path("admin/", admin.site.urls),
path("", views.index),
path("robots.txt", views.robots, name="robots_txt"),
path("404", views.unpublished, name="page_not_found"),
path("sites.css", views.site_css, name="site-css"),
path("feeds.json", views.feed_list, name="feed-list"),
path("", include("django_prometheus.urls")),
path("", include("sitemap.urls")),
# ensure these ones are last because they're greedy!
re_path("(?P<key>[0-9a-zA-Z/_\-]+\.xml)", views.feed, name="feed"),
re_path("(?P<key>[0-9a-zA-Z/_\-]+)", views.org_page, name="org-page"),
]
if settings.ARCOLOGY_ENVIRONMENT != "production":
urlpatterns = [
path("api/v1/", include("localapi.urls")),
] + urlpatterns
This is the topmatter for the views described below:
import logging
from django.http import HttpResponse, HttpResponseNotFound, Http404
from django.shortcuts import render, get_object_or_404
from arcology.models import Page, Feed, Site
from roam.models import Link
from prometheus_client import Counter, Histogram
logger = logging.getLogger(__name__)
GET /
site index
this will just call the Org Page rendering function for the site's index page. render_page
is defined below.
def index(request):
site = Site.from_request(request)
full_key = f"{site.key}/index"
return render_page(request, site, full_key)
Arcology Org Page handler
- State "INPROGRESS" from [2023-12-20 Wed 17:48]
This constructs a page key from the request, tries to load that page and its HTML, and renders that along with a bunch of other metadata stored in relation to the Page
object in the DB.
def org_page(request, key):
site = Site.from_request(request)
if site.key == "localhost":
full_key = key
new_site_key = key.split("/")[0]
site = Site.objects.filter(key=new_site_key).first()
else:
full_key = f"{site.key}/{key}"
return render_page(request, site, full_key)
This render_page
function is shared between the index
request and the more complicated route handler.
It's manually instrumented with a few Prometheus Client counters and gauges to be emitted on top of what comes out of django-prometheus
already. This extra instrumentation is just enough to make a per-site and per-page hit chart, along with some very rudimentary User-Agent break-down to filter out most of the automated traffic.
page_counter = Counter("arcology_page", "Hit counter for each page", ["site", "page", "status", "agent_type"])
render_latency = Histogram("arcology_page_render_seconds", "Latency for render_page func.", ["page", "site", "agent_type"])
from arcology.agent_utils import AgentClassification
from django.template import loader
def render_page(request, site, full_key):
agent = AgentClassification.from_request(request)
with render_latency.labels(page=full_key, site=site.key, agent_type=agent).time():
try:
the_page = Page.objects.get(route_key=full_key)
except Page.DoesNotExist:
page_counter.labels(page=full_key, status=404, site=site.key, agent_type=agent).inc()
template = loader.get_template("404.html")
context = dict(
missing_key=full_key
)
return HttpResponseNotFound(
template.render(context, request)
)
links = the_page.collect_links()
page_html = the_page.to_html(links)
feeds = site.feed_set.all()
page_counter.labels(page=full_key, status=200, site=site.key, agent_type=agent).inc()
return render(request, "arcology/page.html", dict(
site=site,
page=the_page,
feeds=feeds,
head_title=f"{the_page.title} - {site.title}",
html_content=page_html,
backlinks=the_page.collect_backlinks(),
keywords=the_page.collect_keywords().all(),
references=the_page.collect_references(),
tags=the_page.collect_tags(),
))
arcology/page.html
extends app.html
to embed the Org page and its metadata
The page
template extends the app template defined below, which provides four blocks to inject content in to:
{% extends "arcology/app.html" %}
The tab title is assembled from the page and site title:
{% block title %}{{ head_title }}{% endblock %}
If the site has any feeds, they're injected in to the <head>
along with any particular web-crawler rules.
{% block extra_head %}
{% for feed in feeds %}
<link rel="alternate" type="application/atom+xml" href="{{ feed.url }}" title="{{ feed.title }}" />
{% endfor %}
{% if page.allow_crawl is none or page.allow_crawl is '"nil"' %}
<meta name="robots" content="noarchive noimageindex noindex nofollow"/>
{% else %}
<meta name="robots" content=""/>
{% endif %}
{% endblock %}
The main content
block contains the <main>
generated by the native parser, and a sidebar containing backlinks, and page metadata, and other crap.
{% load cache %}
{% block content %}
{# HTML is sent through without HTML Escaping via | safe #}
{{ html_content | safe }}
{% cache 604800 sidebar page.file.digest %}
<section class="sidebar">
{% if backlinks|length > 0 %}
<div class="backlinks">
<h3>Pages Linking Here</h3>
<ul class="backlinks">
{% for backlink in backlinks %}
<li>{{ backlink.to_backlink_html|safe }}</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if tags|length > 0 %}
<div class="tags">
<h3>Page Tags</h3>
<ul class="tags">
{% for tag in tags %}
<li><a href="/tags/{{ tag.tag }}">{{tag.tag}}</a></li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if references|length > 0 %}
<div class="references">
<h3>External References</h3>
<ul class="references">
{% for ref in references %}
<li><a target="_blank" href="{{ ref.ref }}">{{ref.ref}}</a></li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if keywords|length > 0 %}
<div class="keywords">
<h3>Page Metadata Keywords</h3>
<ul class="keywords">
{% for keyword in keywords %}
<pre>#+{{ keyword.keyword }}: {{ keyword.value }}</pre>
{% endfor %}
</ul>
</div>
{% endif %}
</section>
{% endcache %}
{% endblock %}
Here's a really simple 404 template, too.
{% extends "arcology/app.html" %}
{% block title %}Page Not Found{% endblock %}
{% block h1 %}<h1>Page Not Found</h1>{% endblock %}
{% block content %}
<section>
<p>
The page you tried to open either has not been written by the
author or the author has chosen to not publish it at this
time. Please contact the author and include the URL of both the
page you clicked the link on, as well as the link you'd like
to read. You may just want
to <a href="javascript:history.back()">Go Back</a>, too.
</p>
<p>
If you're interested in a particular reference, you might of
course have more luck using a public search engine
like <a href="https://duckduckgo.com">DuckDuckGo</a>
or <a href="https://kagi.com">Kagi</a>.
</p>
<pre>MISSING KEY = {{ missing_key }}</pre>
</section>
{% endblock %}
Org Page-specific CSS Stylings
Most of the page CSS is defined below as part of the app.html
, but the content-specific CSS is here, nearer the actual implementation of the flexbox above.
.content {
margin-left: auto;
margin-right: auto;
padding: 1em;
padding-top: 0;
display: flex;
flex-flow: row wrap;
max-width: 120ch;
}
.content > section, main {
display: inline-block;
flex-grow: 1;
flex-shrink: 1;
flex-basis: 40em;
padding: 1em;
overflow: auto;
}
.content > section.sidebar {
flex-grow: 0;
flex-shrink: 1;
flex-basis: 30ch;
}
The sidebar itself is a vertical flexbox, pushing everything but the backlinks towards the bottom of the page.
section.sidebar {
display: flex;
flex-flow: column wrap;
}
section.sidebar > div.backlinks {
flex-grow: 1;
}
Here are some hacks to put a line between the main content flexbox and the sidebar. I'm not sure I'll keep this, but it's nice to have a delimeter.
.content::before {
align-self: stretch;
content: '';
border: 1px dotted var(--medium-gray);
margin-top: 1em;
margin-bottom: 1em;
}
.content > *:first-child {
order: -1;
}
And some simple image wrangling:
.content img {
display: block;
width: 80%;
margin: 0 auto;
}
These rules annotate task headings by inserting an icon before them.
.task.task-DONE::before {
content: '☑️ ';
}
.task.task-NEXT::before {
content: '🆕 ';
}
.task.task-INPROGRESS::before {
content: '⏳ ';
}
This will display the header arguments to org-babel
source blocks: You're staring right at one!
span.babel-args {
text-align: right;
display: block;
background: var(--light-gray);
margin-bottom: 0;
}
pre.src {
border-top: 1px solid var(--black);
background-color: var(--light-gray);
font-style: normal;
overflow: scroll;
margin-top: 0;
padding-top: 1em;
padding-left: 0.5em;
padding-bottom: 1em;
padding-right: 0.5em;
}
Atom Feed Handler
- State "INPROGRESS" from "NEXT" [2024-02-04 Sun 23:48]
This uses the sub-feature of the HTML exporter to export only certain sub-headings in The arroyo_rs Native Org Parser. The FeedEntry
's defined above are used to construct the feed. I do some gnarly stuff including just stuffing a custom Django template filter in to there so that I can keep a bunch of node ID
-> $thing
maps so that when I make the feed entries I can just reach in to a few dicts instead of shaping that all on the handler. But 仕方がない…
import arrow
import roam.models
def feed(request, key):
# Get the site and construct the route key
site = Site.from_request(request)
if site.key == "localhost":
full_key = key
new_site_key = key.split("/")[0]
site = Site.objects.filter(key=new_site_key).first()
else:
full_key = f"{site.key}/{key}"
# Fetch page metadata
the_feed = get_object_or_404(Feed, route_key=full_key)
entries = the_feed.feedentry_set.order_by("-pubdate").all()[:10]
if len(entries) == 0:
return Http404()
try:
page_author = roam.models.Keyword.objects.get(keyword="AUTHOR", path=the_feed.file).value
except roam.models.Keyword.DoesNotExist:
logger.warn(f"Feed {key} does not have an AUTHOR!")
page_author = "Arcology User"
page_url = the_feed.file.page_set.first().to_url()
updated_at = arrow.get(entries[0].pubdate).format(arrow.FORMAT_RFC3339) # entries is already sorted
# node-id -> URL
links = the_feed.file.page_set.first().collect_links()
# node-id -> HTML
html_map = {
entry.heading.node_id: entry.to_html(links=links) for entry in entries
}
# node-id -> PUBDATE heading property
pubdate_map = {
entry.heading.node_id: arrow.get(entry.pubdate).format(arrow.FORMAT_RFC3339) for entry in entries
}
# return HttpResponse("",content_type="application/atom+xml")
return render(request, "arcology/feed.xml", dict(
title=the_feed.title,
page_url=page_url,
author=page_author,
updated_at=updated_at,
feed_entries=entries,
htmls=html_map,
pubdates=pubdate_map,
links=links,
), content_type="application/atom+xml")
An Atom feed is pretty simple, it's an XML document with multiple <entry>
's and the metadata we collected above. For once i'm glad that Python templating treats strings as HTML-Unsafe and escapes the generated HTML used in the Summary for me. This bit me in the past, with the FastAPI version – the stuff that goes inside of type = "html"
elements isn't necessarily valid XML so it needs to get escaped.
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>{{ title }}</title>
<link href="{{ page_url }}"/>
<updated>{{ updated_at }}</updated>
<author>
<name>{{ author }}</name>
</author>
<id>{{ page_url }}</id>
{% for entry in feed_entries %}
<entry>
<title>{{ entry.title }}</title>
<link href="{{ links | get_item:entry.heading.node_id }}"/>
<id>urn:uid:{{ entry.heading.node_id }}</id>
<updated>{{ pubdates | get_item:entry.heading.node_id }}</updated>
<summary type="html">{{ htmls | get_item:entry.heading.node_id }}</summary>
</entry>
{% endfor %}
</feed>
NEXT add category/tags to the entries
NEXT move this function to somewhere else more reasonable
This template relies on this custom Django template i nicked from StackOverflow to access a dict with a variable key.
from django.template.defaulttags import register
@register.filter
def get_item(dictionary, key):
return dictionary.get(key)
CANCELLED [#A] see if the IDs are consistent with the old generator
- State "CANCELLED" from "NEXT" [2024-02-26 Mon 17:46]
404 unpublished/not found endpoint
There are plenty of links inside the Arcology which aren't meant to be clicked. roam:
stub links will of course
def unpublished(request):
key = request.GET.get("key")
if key is None:
key = "NOT_SUPPLIED"
# query links etc to create a JSON doc for SigmaJS
template = loader.get_template("404.html")
context = dict(
missing_key=key
)
return HttpResponseNotFound(
template.render(context, request)
)
GET /robots.txt
Endpoint
robots.txt is the roam:Robots Exclusion Protocol, a standard used by websites to indicate to visiting web crawlers and other web robots which portions of the website they are allowed to visit.
- Disallow all GPT-alikes on all pages, I will add more to this list as necessary. Probably will pull these in to Arcology Project Configuration sooner or later.
- Show all pages with a truthy
ARCOLOGY_ALLOW_CRAWL
roam.models.Keyword">roam.models.Keyword
- If we're on local development, it will show all pages, otherwise only ones for the site being queried.
def robots(request):
site = Site.from_request(request)
public_pages = Page.objects \
.filter(allow_crawl=True)
if site.key != "localhost":
public_pages = public_pages \
.filter(site=site)
public_pages = public_pages.all()
return render(request, "arcology/robots.txt", dict(
disallow_all_agents=["GPTBot", "ChatGPT-User", "Google-Extended", "CCBot", "anthropic-ai"],
pages=public_pages,
), content_type="text/plain")
Those values are passed to the Jinja template:
{% for agent in disallow_all_agents %}
User-agent: {{ agent }}
Disallow: /
{% endfor %}
User-agent: *
Disallow: /
{% for page in pages %}Allow: {{ page.to_url_path }}
{% endfor %}
GET /feeds.json
Feed discovery endpoint
CLOCK: [2024-02-15 Thu 14:17]–[2024-02-15 Thu 14:41] => 0:24
import json
def feed_list(request):
site = Site.from_request(request)
feeds = Feed.objects.all()
ret = [
dict(
key=feed.route_key,
url=feed.site.urlize_feed(feed),
title=feed.title,
site=feed.site.key,
visibility=feed.visibility,
)
for feed in feeds
]
return HttpResponse(json.dumps(ret), content_type="application/json")
GET /sites.css
Per-Site link color dynamic CSS endpoint
This endpoint generates a dynamic CSS file that colorizes internal URLs based on the The Arcology's Site List which is stored in the database. It does something extremely wicked to make the page links less jarring until you hover over them by faking an alpha-channel in to the color.
def site_css(request):
sites = Site.objects.all()
stanzas = []
for site in sites:
for domain in site.sitedomain_set.all():
stanzas.append(f'''
a[href*="//{domain.domain}"] {{
border-radius: 0.25em;
padding: 0.1em;
background-color: {site.link_color}66;
}}
a[href*="//{domain.domain}"]:hover {{
background-color: {site.link_color}FF !important;
}}
''')
stanzas.append(f'''
a[href*="/404"] {{
color: var(--alert);
/* text-decoration: line-through; */
}}
a[href*="/404"]::after {{
content: " ⚠";
}}
a[href*="/404"]::before {{
content: "⚠ ";
}}
''')
return HttpResponse(stanzas, content_type="text/css")
app.html
Arcology Site Templates
In short, there are four blocks that the page template and other templates will use to embed content in the rendered web page:
title
is the<title>
element, the name of the tab.h1
is the displayed site/page title and only needs to be extended if some page wants to do something strange (like site index pages only showing the site title)extra_head
is inside<head>
and can be used to stuff more metadata in therecontent
is where the content goes.
for now it's largely lifted from Base HTML Template and Page HTML Templates from the FastAPI prototype with some nips and tucks to make it more streamlined and legible.
<!DOCTYPE html>
<html>
<head>
The base template provides some basic information and loads the CSS sheets necessary to make things look nice, along with some page and author metadata. It provides a template block extra_head
so that child templates can shove more <head>
elements in here.
{% load static %}
{% load django_htmx %}
<link rel="stylesheet" href="{% static 'arcology/css/app.css' %}"/>
<link rel="stylesheet" href="{% static 'arcology/css/vulf.css' %}"/>
<link rel="stylesheet" href="{% static 'arcology/css/default-colors.css' %}"/>
<link rel="stylesheet" href="{% url 'site-css' %}"/>
{% if site and site.css_file %}
<link rel="stylesheet" href="{% static site.css_file %}"/>
{% endif %}
<meta name="author" content="Ryan Rix"/>
<meta name="generator" content="Arcology Site Engine https://engine.arcology.garden/"/>
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>{% block title %}{{head_title | default:"The Arcology Project" }}{% endblock %}</title>
{% block extra_head %}{% endblock %}
</head>
The body consists of a header which has the site and page title (which can be overridden for example in the index
handler to only show the site title) and links to the other sites. These should be loaded from the DB eventually.
<body>
<header>
<div class="header-content">
{% block h1 %}
<h1><a href='/'>{{ site.title }}</a></h1>
<h2>{{ page.title }}</h2>
{% endblock %}
<div>
• <a class="internal" href="https://thelionsrear.com">Life</a>
• <a class="internal" href="https://arcology.garden">Tech</a>
• <a class="internal" href="https://cce.whatthefuck.computer">Emacs</a>
• <a class="internal" href="https://engine.arcology.garden">Arcology</a>
•
</div>
</div>
</header>
The content
block is used in child templates to hide a <main>
; the content
div should be a main element instead but The arroyo_rs Native Org Parser wants to output a <main>
and i'm not going to stop it, so the div is there to make the body's flexbox layout work.
<div class="content">
{% block content %}{% endblock %}
</div>
A footer contains the oh-so-important copyright notice and a limited privacy policy which I should update before I ship this, along with links to the sitemap and to my fediring neighbors.
<footer>
<hr/>
© 02024 <a href="https://arcology.garden/people/rrix">Ryan Rix</a> <<a href="mailto:site@whatthefuck.computer">site@whatthefuck.computer</a>>
<br/>
<p>
Care has been taken to publish accurate information to
long-lived URLs, but context and content as well as URLs may
change without notice.
</p>
<p>
This site collects no personal information from visitors, nor
stores any identifying tokens. If you or your personal
information ended up in public notes please email me for
correction or removal. A single bit cookie may be stored on
your device if you choose to change appearance settings below.
</p>
<p>
Email me with questions, comments, insights, kind criticism.
blow horn, good luck.
</p>
<p>
View the <a href="/sitemap">Site Map</a> or the <a href="/tags">Tag Index</a>.
</p>
<p>
<a href="https://fediring.net/previous?host=arcology.garden">←</a>
<a href="https://fediring.net/">Fediring</a>
<a href="https://fediring.net/next?host=arcology.garden">→</a>
</p>
The FastaAPI site had a "boredom mode" which would disable fonts and colors because some nerds were mean to me. This one will not have that until some nerds are mean to me.
<!--
<p>
<input type="checkbox" id="boredom-mode"><label for="boredom-mode">I do not like your aesthetic sensibilities!!</label>
</p>
<script type="text/javascript">
<<boredom>>
</script>
-->
</footer>
</body>
</html>
CSS
this will be extended.
rather than using emoji for each site, it would be nice to subtly color them based on the link_color… will need to Do Some Bullshit to make that work though maybe.
body {
font-family: "Vulf Mono", monospace;
font-style: italic;
font-size: medium;
background-color: var(--white);
color: var(--black);
margin: 0;
}
header {
background-color: var(--light-gray);
border-radius: 0.25em;
margin-top: 0;
border-bottom: 2px solid var(--dark-gray);
}
header > .header-content {
padding: 1em;
max-width: 120ch;
margin-left: auto;
margin-right: auto;
}
header h1, header h2 {
margin-top: 0;
display: inline;
}
header h2:before {
content: " — ";
}
footer {
margin-left: auto;
margin-right: auto;
max-width: 120ch;
font-size: smaller;
text-align: center;
}
footer a {
font-weight: 500;
}
a {
color: var(--primary);
}
a::visited {
color: var(--secondary);
}
code {
font-style: normal;
}
There are per-site CSS in The Arcology's Site List.
Generating @font-face
rules for a bunch of fonts
Vulfpeck Fonts are pulled in with this code-gen because writing @font-face
rules does not bring joy and I don't have the right to redistribute these files, so I won't check it in at all.
VulfSans | Regular | 500 | |
VulfMono | Regular | 500 | |
VulfSans | Bold | 800 | |
VulfMono | Bold | 800 | |
VulfSans | Italic | 500 | italic |
VulfMono | Italic | 500 | italic |
VulfSans | Bold_Italic | 800 | italic |
VulfMono | Bold_Italic | 800 | italic |
VulfSans | Light | 300 | |
VulfMono | Light | 300 | |
VulfSans | Light_Italic | 500 | italic |
VulfMono | Light_Italic | 500 | italic |
(with-temp-buffer
(-map (pcase-lambda (`(,first ,second ,weight ,style))
(insert
(s-join "\n" (list
"@font-face {"
"font-family: " (if (equal first "VulfMono")
"\"Vulf Mono\""
"\"Vulf Sans\"")
"; src:"
(concat "url('/static/arcology/fonts/" first "-" second ".woff') format('woff'),")
(concat "url('/static/arcology/fonts/" first "-" second ".woff2') format('woff2'),")
(concat "url('/static/arcology/fonts/" first "-" second ".ttf') format('truetype');")
"font-weight: " (number-to-string weight) ";"
(unless (equal style "")
(concat "font-style: " style ";"))
"}"))))
tbl)
(write-file "~/org/arcology-django/arcology/static/arcology/css/vulf.css"))
NEXT this is a lever for restructuring the arcology
app.html
template would be provided by a configuration-module repo that a user should set up on a template that depends on arroyo, arcology, roam modules. It would be the one responsible for setting up gunicorn
etc, and also provide the command line wrapper
NEXT Testing
- site from_request and from_key need to be tested
- site urlize page function needs to be tested too
- page collect functions at least need type annotations…
to_html
instance method needs to be tested (and the memoization too)create_from_arroyo
too-
feed
andfeedentry
- both the
create_from_arroyo
,to_html
- both the
- the feed generator stuff in the view probably should go in to a model class, but test it.
-
page handler view logic, test that 404s work, check that localhost loads work
- check optional sidebar stuff in the view logic
- sitemap when i write it
- per-site link color css endpoint