arcology/arcology.org

44 KiB
Raw Permalink Blame History

The Arcology's Data Models and Web Server

Data Models for Sites, Web Features, and Feeds

from __future__ import annotations
from typing import Optional, List
from django.db import models
from django.conf import settings
from django_prometheus.models import ExportModelOperationsMixin as EMOM

import arrow

import arroyo.arroyo_rs as native
from arcology.cache_decorator import cache

import roam.models

import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)

# used for some memoization
class hashabledict(dict):
    def __hash__(self):
        return hash(tuple(sorted(self.items())))

Site

A Site has many SiteDomain's. It has a routing key, and a title, and some CSS and customization. There are a few helper classmethods to take an input request or routing key and output a Site object based on the SiteDomain or whatnot. I'm not sure I want the async definitions to stick around, there needs to be some consideration of what should be async in this system and where asgi can be relied on for concurrency.

Sites are created in the Arcology Seed Command.

# Sites and SiteDomains are created in django-admin or a seed rather than from arroyo parser, no create_from_arroyo..!
class Site(EMOM('site'), models.Model):
    key = models.CharField(max_length=512, primary_key=True)
    title = models.CharField(max_length=512)

    # add choices
    css_file = models.CharField(max_length=512, blank=True, default=None)
    # this is used in sitemap, and maybe links..
    link_color = models.CharField(max_length=8, blank=True, default=None)

    def urlize_page(self, page: Page, heading: Optional[roam.models.Heading] = None):
        domain = self.sitedomain_set.first().domain
        key_rest = page.route_key.split("/", 1)[1]
        url = f"https://{domain}/{key_rest}"
        if heading is not None:
            url = url + f"#{heading.node_id}"
        return url


    def urlize_feed(self, feed: Feed):
        domain = self.sitedomain_set.first().domain
        key_rest = feed.route_key.split("/", 1)[1]
        url = f"https://{domain}/{key_rest}"
        return url


    @classmethod
    def from_route(cls: Site, route_key: str) -> Site:
        site_key = route_key.split("/")[0]
        site = cls.objects.get(key=site_key)
        assert site is not None
        return site

    @classmethod
    def from_request(cls: Site, request) -> Site:
        host = request.headers.get("Host")
        site = cls.objects.filter(sitedomain__domain=host).first()
        assert site is not None
        return site


class SiteDomain(EMOM('site_domain'), models.Model):
    site = models.ForeignKey(
        Site,
        on_delete=models.CASCADE,
    )
    domain = models.CharField(max_length=512)

Base migration

migrations.CreateModel(
    name="Site",
    fields=[
        (
            "key",
            models.CharField(max_length=512, primary_key=True, serialize=False),
        ),
        ("title", models.CharField(max_length=512)),
        (
            "css_file",
            models.CharField(blank=True, default=None, max_length=512),
        ),
        (
            "link_color",
            models.CharField(blank=True, default=None, max_length=8),
        ),
    ],
),
migrations.CreateModel(
    name="SiteDomain",
    fields=[
        (
            "id",
            models.BigAutoField(
                auto_created=True,
                primary_key=True,
                serialize=False,
                verbose_name="ID",
            ),
        ),
        (
            "site",
            models.ForeignKey(
                on_delete=django.db.models.deletion.CASCADE, to="arcology.site"
            ),
        ),
        ("domain", models.CharField(default="localhost", max_length=512)),
    ],
),

Page

A site has many pages. Pages have a routing key defined by the ARCOLOGY_KEY keyword, a title based on the level-0 heading, and some metadata besides that.

These are created using the create_from_arroyo">create_from_arroyo pattern which makes it easy for the Arcology ingest_files Command to include new functionality in to the system.

class Page(EMOM('page'), models.Model):
    file = models.ForeignKey(
        roam.models.File,
        on_delete=models.CASCADE,
    )
    route_key = models.CharField(max_length=512, primary_key=True)
    root_heading = models.ForeignKey(roam.models.Heading, on_delete=models.CASCADE)
    site = models.ForeignKey(
        Site,
        on_delete=models.CASCADE,
    )
    title = models.CharField(max_length=512)
    allow_crawl = models.BooleanField(default=False)

    def to_url(self):
        site = self.site
        return site.urlize_page(self)

    def to_url_path(self):
        key_rest = self.route_key.split("/", 1)[1]
        return f"/{key_rest}"


    def collect_keywords(self):
        return self.file.keyword_set

    def collect_tags(self):
        return [
            tag
            for heading in self.file.heading_set.all()
            for tag in heading.tag_set.all()
        ]

    def collect_references(self):
        return [
            reference
            for heading in self.file.heading_set.all()
            for reference in heading.reference_set.all()
        ]

    def collect_links(self):
        my_headings = self.file.heading_set.all()
        link_objs = self.file.outbound_links.all()
        ret = {
          h.node_id: h.to_url() for h in my_headings
        }
        for el in link_objs:
            try:
                h = el.dest_heading
                url = h.to_url()
                ret[h.node_id] = url
                logger.info(f"link {url} from {el}")
            except roam.models.Heading.DoesNotExist:
                logger.info(f"{el} does not have dest")

        return ret


    def collect_backlinks(self) -> List[Link]:
        my_headings = self.file.heading_set.all()
        return set(roam.models.Link.objects.filter(dest_heading__in=my_headings))

    def to_html(self, links, heading=None, include_subheadings=False):
        return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)

    @cache(key_prefix="page_html", expire_secs=60*60*24*7)
    def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
        if heading is not None:
            headings = [heading]
        else:
            headings = []
        opts = native.ExportOptions(
            link_retargets=links,
            limit_headings=headings,
            include_subheadings=include_subheadings,
            ignore_tags=settings.IGNORED_ROAM_TAGS,
        )
        return native.htmlize_file(self.file.path, opts)


    @classmethod
    def create_from_arroyo(cls, doc: native.Document) -> Page:
        f = roam.models.File.objects.get(path=doc.path)
        route_key = next(iter(doc.collect_keywords("ARCOLOGY_KEY")), "")
        allow_crawl = (
            next(iter(doc.collect_keywords("ARCOLOGY_ALLOW_CRAWL")), False) is not False
        )
        site = Site.from_route(route_key)
        root_heading = f.heading_set.filter(level=0)[0]
        title = root_heading.title
        return cls.objects.get_or_create(
            file=f,
            route_key=route_key,
            allow_crawl=allow_crawl,
            site=site,
            root_heading=root_heading,
            title=title,
        )[0]

Base migration:

migrations.CreateModel(
    name="Page",
    fields=[
        (
            "route_key",
            models.CharField(max_length=512, primary_key=True, serialize=False),
        ),
        ("title", models.CharField(max_length=512)),
        ("allow_crawl", models.BooleanField(default=False)),
        (
            "file",
            models.ForeignKey(
                on_delete=django.db.models.deletion.CASCADE, to="roam.file"
            ),
        ),
        (
            "root_heading",
            models.ForeignKey(
                on_delete=django.db.models.deletion.CASCADE, to="roam.heading"
            ),
        ),
        (
            "site",
            models.ForeignKey(
                on_delete=django.db.models.deletion.CASCADE, to="arcology.site"
            ),
        ),
    ],
),

Feed

Pages can define an Atom feed + Feediverse feeds by tagging a page with ARCOLOGY_FEED keyword and making sure the headings have a PUBDATE an ID property. This feature relies on Pandoc right now, I'll need to write a custom Atom exporter in The arroyo_rs Native Org Parser when it comes time to implement these feeds.

These are also created using the create_from_arroyo">create_from_arroyo pattern which makes it easy for the Arcology ingest_files Command to include new functionality in to the system.

class Feed(EMOM('feed'), models.Model):
    POST_VISIBILITY = [
        ("unlisted", "Unlisted"),
        ("private", "Private"),
        ("public", "Public"),
        ("direct", "direct"),  # might be different, XXX
    ]

    file = models.ForeignKey(
        roam.models.File,
        on_delete=models.CASCADE,
    )
    route_key = models.CharField(max_length=512, primary_key=True)
    site = models.ForeignKey(
        Site,
        on_delete=models.CASCADE,
    )
    title = models.CharField(max_length=512)
    visibility = models.CharField(max_length=512, choices=POST_VISIBILITY)

    def url(self):
        return self.site.urlize_feed(self)

    @classmethod
    def create_from_arroyo(cls, doc: native.Document) -> Feed | None:
        route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None)
        if not route_key:
            return None
        visibility = next(
            iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private"
        )
        f = roam.models.File.objects.get(path=doc.path)
        site = Site.from_route(route_key)
        root_heading = f.heading_set.filter(level=0)[0]
        title = root_heading.title

        return cls.objects.get_or_create(
            file=f,
            route_key=route_key,
            title=title,
            visibility=visibility,
            site=site,
        )[0]

    @classmethod
    async def aget(cls, **kwargs):
        return await cls.objects.prefetch_related("file", "site").aget(
            **kwargs
        )

Base migration

migrations.CreateModel(
    name="Feed",
    fields=[
        (
            "route_key",
            models.CharField(max_length=512, primary_key=True, serialize=False),
        ),
        ("title", models.CharField(max_length=512)),
        (
            "visibility",
            models.CharField(
                choices=[
                    ("unlisted", "Unlisted"),
                    ("private", "Private"),
                    ("public", "Public"),
                    ("direct", "direct"),
                ],
                max_length=512,
            ),
        ),
        (
            "file",
            models.ForeignKey(
                on_delete=django.db.models.deletion.CASCADE, to="roam.file"
            ),
        ),
        (
            "site",
            models.ForeignKey(
                on_delete=django.db.models.deletion.CASCADE, to="arcology.site"
            ),
        ),
    ],
),

FeedEntry

A FeedEntry is a Heading with a PUBDATE property that exists on a page w/ ARCOLOGY_FEED Keyword. These are used to construct Feeds

class FeedEntry(EMOM('feed_entry'), models.Model):
    POST_VISIBILITY = [
        ("unlisted", "Unlisted"),
        ("private", "Private"),
        ("public", "Public"),
        ("direct", "direct"),  # might be different, XXX
    ]

    heading = models.ForeignKey(
        roam.models.Heading,
        on_delete=models.CASCADE,
    )
    feed = models.ForeignKey(
        Feed,
        on_delete=models.CASCADE,
    )
    route_key = models.CharField(max_length=512)
    site = models.ForeignKey(
        Site,
        on_delete=models.CASCADE,
    )
    title = models.CharField(max_length=512)
    visibility = models.CharField(max_length=512, choices=POST_VISIBILITY)
    pubdate = models.DateTimeField(auto_now=False)

    def to_html(self, links):
        return self._to_html_memoized(hashabledict(links), self.heading.path.digest)

    @cache(key_prefix="feedentry_html", expire_secs=60*60*24*7)
    def _to_html_memoized(self, links, _file_digest):
        opts = native.ExportOptions(
            link_retargets=links,
            limit_headings=[self.heading.node_id],
            include_subheadings=True,
            ignore_tags=settings.IGNORED_ROAM_TAGS,
        )
        return native.htmlize_file(self.heading.path.path, opts)

    @classmethod
    def create_from_arroyo(cls, doc: native.Document) -> List[Feed] | None:
        route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None)
        if not route_key:
            return None
        visibility = next(
            iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private"
        )
        site = Site.from_route(route_key)
        # f = roam.models.File.objects.get(path=doc.path)
        feed = Feed.objects.get(route_key=route_key)

        rets = []
        for nheading in doc.headings:
          if nheading.id is not None:
            heading = roam.models.Heading.objects.get(node_id=nheading.id)
            pdqs = heading.headingproperty_set.filter(keyword="PUBDATE")
            if not pdqs.exists():
                continue
            v = pdqs.first().value
            pubdate = arrow.get(v, "YYYY-MM-DD ddd H:mm").format(arrow.FORMAT_RFC3339)
            title = heading.title
            rets += [cls.objects.get_or_create(
                heading=heading,
                feed=feed,
                route_key=route_key,
                title=title,
                pubdate=pubdate,
                visibility=visibility,
                site=site,
            )[0]]
        # root_heading = f.heading_set.filter(level=0)[0]
        # title = root_heading.title

        return rets

Database Migrations

0001_base

These are assembled from the snippets described in the models above.

# Generated by Django 4.2.6 on 2023-12-18 02:46

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
    replaces = [("arcology", "0001_initial"), ("arcology", "0002_sitedomain_domain")]

    dependencies = [
        ("roam", "0005_alter_link_dest_heading"),
    ]

    operations = [
        <<migration-site>>
        <<migration-page>>
        <<migration-feed>>
    ]

NEXT admin

dont worry too much about these; they are just used to validate that the data is ingested properly, to be honest.

from django.contrib import admin
import arcology.models


class DomainInline(admin.TabularInline):
    model = arcology.models.SiteDomain


@admin.register(arcology.models.Site)
class SiteAdmin(admin.ModelAdmin):
    inlines = [DomainInline]


@admin.register(arcology.models.Page)
class PageAdmin(admin.ModelAdmin):
    pass


@admin.register(arcology.models.Feed)
class FeedAdmin(admin.ModelAdmin):
    pass

@admin.register(arcology.models.FeedEntry)
class FeedEntryAdmin(admin.ModelAdmin):
    list_display = ["heading", "route_key", "pubdate", "title"]

The Web Server

These are the route urlpatterns:

from django.contrib import admin
from django.urls import path, re_path, include
from django.conf import settings

from arcology import views

urlpatterns = [
    path("admin/", admin.site.urls),
    path("", views.index),
    path("robots.txt", views.robots, name="robots_txt"),
    path("404", views.unpublished, name="page_not_found"),
    path("sites.css", views.site_css, name="site-css"),
    path("feeds.json", views.feed_list, name="feed-list"),
    path("", include("django_prometheus.urls")),
    path("", include("sitemap.urls")),
    # ensure these ones are last because they're greedy!
    re_path("(?P<key>[0-9a-zA-Z/_\-]+\.xml)", views.feed, name="feed"),
    re_path("(?P<key>[0-9a-zA-Z/_\-]+)", views.org_page, name="org-page"),
]
if settings.ARCOLOGY_ENVIRONMENT != "production":
    urlpatterns = [
            path("api/v1/", include("localapi.urls")),
    ] + urlpatterns

This is the topmatter for the views described below:

import logging
from django.http import HttpResponse, HttpResponseNotFound, Http404
from django.shortcuts import render, get_object_or_404

from arcology.models import Page, Feed, Site
from roam.models import Link

from prometheus_client import Counter, Histogram

logger = logging.getLogger(__name__)

GET / site index

this will just call the Org Page rendering function for the site's index page. render_page is defined below.

def index(request):
    site = Site.from_request(request)
    full_key = f"{site.key}/index"
    return render_page(request, site, full_key)

Arcology Org Page handler

  • State "INPROGRESS" from [2023-12-20 Wed 17:48]

This constructs a page key from the request, tries to load that page and its HTML, and renders that along with a bunch of other metadata stored in relation to the Page object in the DB.

def org_page(request, key):
    site = Site.from_request(request)
    if site.key == "localhost":
        full_key = key
        new_site_key = key.split("/")[0]
        site = Site.objects.filter(key=new_site_key).first()
    else:
        full_key = f"{site.key}/{key}"

    return render_page(request, site, full_key)

This render_page function is shared between the index request and the more complicated route handler.

It's manually instrumented with a few Prometheus Client counters and gauges to be emitted on top of what comes out of django-prometheus already. This extra instrumentation is just enough to make a per-site and per-page hit chart, along with some very rudimentary User-Agent break-down to filter out most of the automated traffic.

page_counter = Counter("arcology_page", "Hit counter for each page", ["site", "page", "status", "agent_type"])
render_latency = Histogram("arcology_page_render_seconds", "Latency for render_page func.", ["page", "site", "agent_type"])

from arcology.agent_utils import AgentClassification
from django.template import loader

def render_page(request, site, full_key):
    agent = AgentClassification.from_request(request)

    with render_latency.labels(page=full_key, site=site.key, agent_type=agent).time():
        try:
            the_page = Page.objects.get(route_key=full_key)
        except Page.DoesNotExist:
            page_counter.labels(page=full_key, status=404, site=site.key, agent_type=agent).inc() 
            template = loader.get_template("404.html")
            context = dict(
                missing_key=full_key
            )
            return HttpResponseNotFound(
                template.render(context, request)
            )
        links = the_page.collect_links()
        page_html = the_page.to_html(links)

        feeds = site.feed_set.all()

        page_counter.labels(page=full_key, status=200, site=site.key, agent_type=agent).inc()

        return render(request, "arcology/page.html", dict(
            site=site,
            page=the_page,
            feeds=feeds,

            head_title=f"{the_page.title} - {site.title}",
            html_content=page_html,

            backlinks=the_page.collect_backlinks(),
            keywords=the_page.collect_keywords().all(),
            references=the_page.collect_references(),
            tags=the_page.collect_tags(),
        ))

arcology/page.html extends app.html to embed the Org page and its metadata

The page template extends the app template defined below, which provides four blocks to inject content in to:

{% extends "arcology/app.html" %}

The tab title is assembled from the page and site title:

{% block title %}{{ head_title }}{% endblock %}

If the site has any feeds, they're injected in to the <head> along with any particular web-crawler rules.

{% block extra_head %}
  {% for feed in feeds %}
    <link rel="alternate" type="application/atom+xml" href="{{ feed.url }}" title="{{ feed.title }}" />
  {% endfor %}
  {% if page.allow_crawl is none or page.allow_crawl is '"nil"' %}
    <meta name="robots" content="noarchive noimageindex noindex nofollow"/>
  {% else %}
    <meta name="robots" content=""/>
  {% endif %}
{% endblock %}

The main content block contains the <main> generated by the native parser, and a sidebar containing backlinks, and page metadata, and other crap.

{% load cache %}
{% block content %}
  {# HTML is sent through without HTML Escaping via | safe #}
  {{ html_content | safe }}

  {% cache 604800 sidebar page.file.digest %}
  <section class="sidebar">
    {% if backlinks|length > 0 %}
    <div class="backlinks">
      <h3>Pages Linking Here</h3>
      <ul class="backlinks">
        {% for backlink in backlinks %}
          <li>{{ backlink.to_backlink_html|safe }}</li>
        {% endfor %}
      </ul>
    </div>
    {% endif %}

    {% if tags|length > 0 %}
    <div class="tags">
      <h3>Page Tags</h3>
      <ul class="tags">
        {% for tag in tags %}
          <li><a href="/tags/{{ tag.tag }}">{{tag.tag}}</a></li>
        {% endfor %}
      </ul>
    </div>
    {% endif %}

    {% if references|length > 0 %}
    <div class="references">
      <h3>External References</h3>
      <ul class="references">
        {% for ref in references %}
          <li><a target="_blank" href="{{ ref.ref }}">{{ref.ref}}</a></li>
        {% endfor %}
      </ul>
    </div>
    {% endif %}

    {% if keywords|length > 0 %}
    <div class="keywords">
      <h3>Page Metadata Keywords</h3>
      <ul class="keywords">
        {% for keyword in keywords %}
          <pre>#+{{ keyword.keyword }}: {{ keyword.value }}</pre>
        {% endfor %}
      </ul>
    </div>
    {% endif %}
  </section>
  {% endcache %}
{% endblock %}

Here's a really simple 404 template, too.

{% extends "arcology/app.html" %}

{% block title %}Page Not Found{% endblock %}
{% block h1 %}<h1>Page Not Found</h1>{% endblock %}

{% block content %}
<section>
  <p>
    The page you tried to open either has not been written by the
    author or the author has chosen to not publish it at this
    time. Please contact the author and include the URL of both the
    page you clicked the link on, as well as the link you&apos;d like
    to read. You may just want
    to <a href="javascript:history.back()">Go Back</a>, too.
  </p>

  <p>
    If you&apos;re interested in a particular reference, you might of
    course have more luck using a public search engine
    like <a href="https://duckduckgo.com">DuckDuckGo</a>
    or <a href="https://kagi.com">Kagi</a>.
  </p>

  <pre>MISSING KEY = {{ missing_key }}</pre>

</section>
{% endblock %}

Org Page-specific CSS Stylings

Most of the page CSS is defined below as part of the app.html, but the content-specific CSS is here, nearer the actual implementation of the flexbox above.

.content {
  margin-left: auto;
  margin-right: auto;
  padding: 1em;
  padding-top: 0;
  display: flex;
  flex-flow: row wrap;
  max-width: 120ch;
}

.content > section, main {
  display: inline-block;
  flex-grow: 1;
  flex-shrink: 1;
  flex-basis: 40em;
  padding: 1em;
  overflow: auto;
}

.content > section.sidebar {
  flex-grow: 0;
  flex-shrink: 1;
  flex-basis: 30ch;
}

The sidebar itself is a vertical flexbox, pushing everything but the backlinks towards the bottom of the page.

section.sidebar {
  display: flex;
  flex-flow: column wrap;
}

section.sidebar > div.backlinks {
  flex-grow: 1;
}

Here are some hacks to put a line between the main content flexbox and the sidebar. I'm not sure I'll keep this, but it's nice to have a delimeter.

.content::before {
    align-self: stretch;
    content: '';
    border: 1px dotted var(--medium-gray);
    margin-top: 1em;
    margin-bottom: 1em;
}
.content > *:first-child {
    order: -1;
}

And some simple image wrangling:

.content img {
  display: block;
  width: 80%;
  margin: 0 auto;
}

These rules annotate task headings by inserting an icon before them.

.task.task-DONE::before {
  content: '☑️ ';
}

.task.task-NEXT::before {
  content: '🆕 ';
}

.task.task-INPROGRESS::before {
  content: '⏳ ';
}

This will display the header arguments to org-babel source blocks: You're staring right at one!

span.babel-args {
    text-align: right;
    display: block;
    background: var(--light-gray);
    margin-bottom: 0;
}

pre.src {
    border-top: 1px solid var(--black);
    background-color: var(--light-gray);
    font-style: normal;
    overflow: scroll;
    margin-top: 0;

    padding-top: 1em;
    padding-left: 0.5em;
    padding-bottom: 1em;
    padding-right: 0.5em;
}

Atom Feed Handler

  • State "INPROGRESS" from "NEXT" [2024-02-04 Sun 23:48]

This uses the sub-feature of the HTML exporter to export only certain sub-headings in The arroyo_rs Native Org Parser. The FeedEntry's defined above are used to construct the feed. I do some gnarly stuff including just stuffing a custom Django template filter in to there so that I can keep a bunch of node ID -> $thing maps so that when I make the feed entries I can just reach in to a few dicts instead of shaping that all on the handler. But 仕方がない

import arrow 
import roam.models

def feed(request, key):
    # Get the site and construct the route key
    site = Site.from_request(request)
    if site.key == "localhost":
        full_key = key
        new_site_key = key.split("/")[0]
        site = Site.objects.filter(key=new_site_key).first()
    else:
        full_key = f"{site.key}/{key}"

    # Fetch page metadata
    the_feed    = get_object_or_404(Feed, route_key=full_key)
    entries     = the_feed.feedentry_set.order_by("-pubdate").all()[:10]

    if len(entries) == 0:
        return Http404()

    try:
        page_author = roam.models.Keyword.objects.get(keyword="AUTHOR", path=the_feed.file).value
    except roam.models.Keyword.DoesNotExist:
        logger.warn(f"Feed {key} does not have an AUTHOR!")
        page_author = "Arcology User"

    page_url    = the_feed.file.page_set.first().to_url()
    updated_at  = arrow.get(entries[0].pubdate).format(arrow.FORMAT_RFC3339) # entries is already sorted

    # node-id -> URL
    links = the_feed.file.page_set.first().collect_links()
    # node-id -> HTML
    html_map = {
        entry.heading.node_id: entry.to_html(links=links) for entry in entries
    }
    # node-id -> PUBDATE heading property
    pubdate_map = {
        entry.heading.node_id: arrow.get(entry.pubdate).format(arrow.FORMAT_RFC3339) for entry in entries
    }

    # return HttpResponse("",content_type="application/atom+xml")
    return render(request, "arcology/feed.xml", dict(
        title=the_feed.title,
        page_url=page_url,
        author=page_author,
        updated_at=updated_at,

        feed_entries=entries,
        htmls=html_map,
        pubdates=pubdate_map,
        links=links,
    ), content_type="application/atom+xml")

An Atom feed is pretty simple, it's an XML document with multiple <entry>'s and the metadata we collected above. For once i'm glad that Python templating treats strings as HTML-Unsafe and escapes the generated HTML used in the Summary for me. This bit me in the past, with the FastAPI version the stuff that goes inside of type = "html" elements isn't necessarily valid XML so it needs to get escaped.

<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">

  <title>{{ title }}</title>
  <link href="{{ page_url }}"/>
  <updated>{{ updated_at }}</updated>
  <author>
    <name>{{ author }}</name>
  </author>
  <id>{{ page_url }}</id>

{% for entry in feed_entries %}
  <entry>
    <title>{{ entry.title }}</title>
    <link href="{{ links | get_item:entry.heading.node_id }}"/>
    <id>urn:uid:{{ entry.heading.node_id }}</id>
    <updated>{{ pubdates | get_item:entry.heading.node_id }}</updated>
    <summary type="html">{{ htmls | get_item:entry.heading.node_id }}</summary>
  </entry>
{% endfor %}

</feed>

NEXT add category/tags to the entries

NEXT move this function to somewhere else more reasonable

This template relies on this custom Django template i nicked from StackOverflow to access a dict with a variable key.

from django.template.defaulttags import register

@register.filter
def get_item(dictionary, key):
    return dictionary.get(key)

CANCELLED [#A] see if the IDs are consistent with the old generator

  • State "CANCELLED" from "NEXT" [2024-02-26 Mon 17:46]

404 unpublished/not found endpoint

There are plenty of links inside the Arcology which aren't meant to be clicked. roam: stub links will of course

def unpublished(request):
    key = request.GET.get("key")
    if key is None:
        key = "NOT_SUPPLIED"

    # query links etc to create a JSON doc for SigmaJS
    template = loader.get_template("404.html")
    context = dict(
        missing_key=key
    )
    return HttpResponseNotFound(
        template.render(context, request)
    )

GET /robots.txt Endpoint

robots.txt is the roam:Robots Exclusion Protocol, a standard used by websites to indicate to visiting web crawlers and other web robots which portions of the website they are allowed to visit.

  • Disallow all GPT-alikes on all pages, I will add more to this list as necessary. Probably will pull these in to Arcology Project Configuration sooner or later.
  • Show all pages with a truthy ARCOLOGY_ALLOW_CRAWL roam.models.Keyword">roam.models.Keyword
  • If we're on local development, it will show all pages, otherwise only ones for the site being queried.
def robots(request):
    site = Site.from_request(request)
    public_pages = Page.objects \
                       .filter(allow_crawl=True)
    if site.key != "localhost":
        public_pages = public_pages \
                       .filter(site=site)

    public_pages = public_pages.all()
    return render(request, "arcology/robots.txt", dict(
        disallow_all_agents=["GPTBot", "ChatGPT-User", "Google-Extended", "CCBot", "anthropic-ai"],
        pages=public_pages,
    ), content_type="text/plain")

Those values are passed to the Jinja template:

{% for agent in disallow_all_agents %}
User-agent: {{ agent }}
Disallow: /
{% endfor %}

User-agent: *
Disallow: /
{% for page in pages %}Allow: {{ page.to_url_path }}
{% endfor %}

GET /feeds.json Feed discovery endpoint

CLOCK: [2024-02-15 Thu 14:17][2024-02-15 Thu 14:41] => 0:24

import json
def feed_list(request):
    site = Site.from_request(request)
    feeds = Feed.objects.all()
    ret = [
        dict(
            key=feed.route_key,
            url=feed.site.urlize_feed(feed),
            title=feed.title,
            site=feed.site.key,
            visibility=feed.visibility,
        )
        for feed in feeds
    ]

    return HttpResponse(json.dumps(ret), content_type="application/json")

GET /sites.css Per-Site link color dynamic CSS endpoint

This endpoint generates a dynamic CSS file that colorizes internal URLs based on the The Arcology's Site List which is stored in the database. It does something extremely wicked to make the page links less jarring until you hover over them by faking an alpha-channel in to the color.

def site_css(request):
    sites = Site.objects.all()
    stanzas = []
    for site in sites:
      for domain in site.sitedomain_set.all():
        stanzas.append(f'''
          a[href*="//{domain.domain}"] {{
            border-radius: 0.25em;
            padding: 0.1em;
            background-color: {site.link_color}66;
          }}
          a[href*="//{domain.domain}"]:hover {{
            background-color: {site.link_color}FF !important;
          }}
        ''')
    stanzas.append(f'''
      a[href*="/404"] {{
        color: var(--alert);
        /* text-decoration: line-through; */
      }}
      a[href*="/404"]::after {{
        content: " ⚠";
      }}
      a[href*="/404"]::before {{
        content: "⚠ ";
      }}
    ''')
    return HttpResponse(stanzas, content_type="text/css")

app.html Arcology Site Templates

In short, there are four blocks that the page template and other templates will use to embed content in the rendered web page:

  • title is the <title> element, the name of the tab.
  • h1 is the displayed site/page title and only needs to be extended if some page wants to do something strange (like site index pages only showing the site title)
  • extra_head is inside <head> and can be used to stuff more metadata in there
  • content is where the content goes.

for now it's largely lifted from Base HTML Template and Page HTML Templates from the FastAPI prototype with some nips and tucks to make it more streamlined and legible.

<!DOCTYPE html>
<html>
  <head>

The base template provides some basic information and loads the CSS sheets necessary to make things look nice, along with some page and author metadata. It provides a template block extra_head so that child templates can shove more <head> elements in here.

  {% load static %}
  {% load django_htmx %}
  <link rel="stylesheet" href="{% static 'arcology/css/app.css' %}"/>
  <link rel="stylesheet" href="{% static 'arcology/css/vulf.css' %}"/>
  <link rel="stylesheet" href="{% static 'arcology/css/default-colors.css' %}"/>
  <link rel="stylesheet" href="{% url 'site-css' %}"/>
  {% if site and site.css_file %}
  <link rel="stylesheet" href="{% static site.css_file %}"/>
  {% endif %}
  <meta name="author" content="Ryan Rix"/>
  <meta name="generator" content="Arcology Site Engine https://engine.arcology.garden/"/>
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>{% block title %}{{head_title | default:"The Arcology Project" }}{% endblock %}</title>
  {% block extra_head %}{% endblock %}
</head>

The body consists of a header which has the site and page title (which can be overridden for example in the index handler to only show the site title) and links to the other sites. These should be loaded from the DB eventually.

<body>
  <header>
    <div class="header-content">
      {% block h1 %}
      <h1><a href='/'>{{ site.title }}</a></h1>
      <h2>{{ page.title }}</h2>
      {% endblock %}
      <div>
        &bull; <a class="internal" href="https://thelionsrear.com">Life</a>
        &bull; <a class="internal" href="https://arcology.garden">Tech</a>
        &bull; <a class="internal" href="https://cce.whatthefuck.computer">Emacs</a>
        &bull; <a class="internal" href="https://engine.arcology.garden">Arcology</a>
        &bull;
      </div>
    </div>
  </header>

The content block is used in child templates to hide a <main>; the content div should be a main element instead but The arroyo_rs Native Org Parser wants to output a <main> and i'm not going to stop it, so the div is there to make the body's flexbox layout work.

<div class="content">
  {% block content %}{% endblock %}
</div>

A footer contains the oh-so-important copyright notice and a limited privacy policy which I should update before I ship this, along with links to the sitemap and to my fediring neighbors.

<footer>
  <hr/>
  &copy; 02024 <a href="https://arcology.garden/people/rrix">Ryan Rix</a> &lt;<a href="mailto:site@whatthefuck.computer">site@whatthefuck.computer</a>&gt;

  <br/>

  <p>
    Care has been taken to publish accurate information to
    long-lived URLs, but context and content as well as URLs may
    change without notice.
  </p>

  <p>
    This site collects no personal information from visitors, nor
    stores any identifying tokens. If you or your personal
    information ended up in public notes please email me for
    correction or removal. A single bit cookie may be stored on
    your device if you choose to change appearance settings below.
  </p>

  <p>
    Email me with questions, comments, insights, kind criticism.
    blow horn, good luck.
  </p>

  <p>
    View the <a href="/sitemap">Site Map</a> or the <a href="/tags">Tag Index</a>.
  </p>

  <p>
    <a href="https://fediring.net/previous?host=arcology.garden">&larr;</a>
    <a href="https://fediring.net/">Fediring</a>
    <a href="https://fediring.net/next?host=arcology.garden">&rarr;</a>
  </p>

The FastaAPI site had a "boredom mode" which would disable fonts and colors because some nerds were mean to me. This one will not have that until some nerds are mean to me.

      <!--
          <p>
            <input type="checkbox" id="boredom-mode"><label for="boredom-mode">I do not like your aesthetic sensibilities!!</label>
          </p>

          <script type="text/javascript">
            <<boredom>>
          </script>
          -->
    </footer>
  </body>
</html>

CSS

this will be extended.

rather than using emoji for each site, it would be nice to subtly color them based on the link_color… will need to Do Some Bullshit to make that work though maybe.

body {
    font-family: "Vulf Mono", monospace;
    font-style: italic;
    font-size: medium;
    background-color: var(--white);
    color: var(--black);
    margin: 0;
}
header {
  background-color: var(--light-gray);
  border-radius: 0.25em;
  margin-top: 0;
  border-bottom: 2px solid var(--dark-gray);
}

header > .header-content {
  padding: 1em;
  max-width: 120ch;
  margin-left: auto;
  margin-right: auto;
}

header h1, header h2 {
  margin-top: 0;
  display: inline;
}

header h2:before {
  content: " — ";
}
footer {
  margin-left: auto;
  margin-right: auto;
  max-width: 120ch;
  font-size: smaller;
  text-align: center;
}

footer a {
  font-weight: 500;
}
a {
  color: var(--primary);
}

a::visited {
  color: var(--secondary);
}

code {
  font-style: normal;
}

There are per-site CSS in The Arcology's Site List.

Generating @font-face rules for a bunch of fonts

Vulfpeck Fonts are pulled in with this code-gen because writing @font-face rules does not bring joy and I don't have the right to redistribute these files, so I won't check it in at all.

VulfSans Regular 500
VulfMono Regular 500
VulfSans Bold 800
VulfMono Bold 800
VulfSans Italic 500 italic
VulfMono Italic 500 italic
VulfSans Bold_Italic 800 italic
VulfMono Bold_Italic 800 italic
VulfSans Light 300
VulfMono Light 300
VulfSans Light_Italic 500 italic
VulfMono Light_Italic 500 italic
(with-temp-buffer
  (-map (pcase-lambda (`(,first ,second ,weight ,style))
          (insert
           (s-join "\n" (list
                         "@font-face {"
                         "font-family: "  (if (equal first "VulfMono")
                                              "\"Vulf Mono\""
                                            "\"Vulf Sans\"")
                         "; src:"
                         (concat "url('/static/arcology/fonts/" first "-" second ".woff') format('woff'),")
                         (concat "url('/static/arcology/fonts/" first "-" second ".woff2') format('woff2'),")
                         (concat "url('/static/arcology/fonts/" first "-" second ".ttf') format('truetype');")
                         "font-weight: " (number-to-string weight) ";"
                         (unless (equal style "")
                           (concat "font-style: " style ";"))
                         "}"))))
        tbl)
  (write-file "~/org/arcology-django/arcology/static/arcology/css/vulf.css"))

NEXT this is a lever for restructuring the arcology

app.html template would be provided by a configuration-module repo that a user should set up on a template that depends on arroyo, arcology, roam modules. It would be the one responsible for setting up gunicorn etc, and also provide the command line wrapper

NEXT Testing

  • site from_request and from_key need to be tested
  • site urlize page function needs to be tested too
  • page collect functions at least need type annotations…
  • to_html instance method needs to be tested (and the memoization too)
  • create_from_arroyo too
  • feed and feedentry

    • both the create_from_arroyo, to_html
  • the feed generator stuff in the view probably should go in to a model class, but test it.
  • page handler view logic, test that 404s work, check that localhost loads work

    • check optional sidebar stuff in the view logic
  • sitemap when i write it
  • per-site link color css endpoint