memoize the HTML export calls

make sure we are ignoring tags properly
2024-02-04 22:31:54 -08:00 · 2024-02-04 22:31:22 -08:00
5 changed files with 68 additions and 37 deletions
--- a/arcology.org
+++ b/arcology.org
@ -13,6 +13,7 @@ from typing import Optional, List
 from django.db import models
 from django.conf import settings
 import arrow
+import functools

 import arroyo.arroyo_rs as native

@ -22,6 +23,11 @@ import logging

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARN)
+
+# used for some memoization
+class hashabledict(dict):
+    def __hash__(self):
+        return hash(tuple(sorted(self.items())))
 #+end_src

 ** Site
@ -175,13 +181,20 @@ class Page(models.Model):
        my_headings = self.file.heading_set.all()
        return set(roam.models.Link.objects.filter(dest_heading__in=my_headings))

+    def to_html(self, links, heading=None, include_subheadings=False):
+        return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)

-    def to_html(self, links, headings=[], include_subheadings=False):
+    @functools.lru_cache(maxsize=500)
+    def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
+        if heading is not None:
+            headings = [heading]
+        else:
+            headings = []
        opts = native.ExportOptions(
            link_retargets=links,
            limit_headings=headings,
            include_subheadings=include_subheadings,
-            ignore_tags=[],
+            ignore_tags=settings.IGNORED_ROAM_TAGS,
        )
        return native.htmlize_file(self.file.path, opts)

@ -346,23 +359,6 @@ migrations.CreateModel(

 A FeedEntry is a Heading with a PUBDATE property that exists on a page w/ ARCOLOGY_FEED Keyword

-#+begin_src python
-feed_kws = roam.models.Keyword.objects.filter(value="garden/updates.xml", keyword="ARCOLOGY_FEED")
-
-headings = [
-    item
-    for kw in feed_kws
-    for item in kw.path.heading_set \
-      .filter(headingproperty__keyword="PUBDATE") \
-      .exclude(tag__tag__in=["noexport", "NOEXPORT"]) \
-      .all()
-]
-
-[
-    h.title for h in headings
-]
-#+end_src
-
 #+begin_src python :tangle arcology/models.py
 class FeedEntry(models.Model):
    POST_VISIBILITY = [
@ -390,11 +386,15 @@ class FeedEntry(models.Model):
    pubdate = models.DateTimeField(auto_now=False)

    def to_html(self, links):
+        return self._to_html_memoized(hashabledict(links), self.heading.path.digest)
+
+    @functools.lru_cache(maxsize=500)
+    def _to_html_memoized(self, links, _file_digest):
        opts = native.ExportOptions(
            link_retargets=links,
            limit_headings=[self.heading.node_id],
            include_subheadings=True,
-            ignore_tags=[],
+            ignore_tags=settings.IGNORED_ROAM_TAGS,
        )
        return native.htmlize_file(self.heading.path.path, opts)

@ -738,14 +738,14 @@ def sitemap(request):

 ** NEXT feed

-all the pandoc based feed generator stuff will need to be recreated or
-bodged in, and all that probably should go in its own django app.
+This uses the sub-feature of the HTML exporter to export only certain sub-headings in [[id:20231023T115950.248543][The arroyo_rs Native Org Parser]]. The =FeedEntry='s defined above are used to construct the feed. I do some gnarly stuff including just stuffing a custom Django template filter in to there so that I can keep a bunch of =node ID= -> =$thing= maps so that when I make the feed entries I can just reach in to a few dicts instead of shaping that all on the handler.

 #+begin_src python :tangle arcology/views.py
 import arrow 
 import roam.models

 def feed(request, key):
+    # Get the site and construct the route key
    site = Site.from_request(request)
    if site.key == "localhost":
        full_key = key
@ -753,31 +753,35 @@ def feed(request, key):
        site = Site.objects.filter(key=new_site_key).first()
    else:
        full_key = f"{site.key}/{key}"
-    logger.warn(site)

-    the_feed = get_object_or_404(Feed, route_key=full_key)
+    # Fetch page metadata
+    the_feed    = get_object_or_404(Feed, route_key=full_key)
+    entries     = the_feed.feedentry_set.order_by("-pubdate").all()[:10]
+    page_author = roam.models.Keyword.objects.get(keyword="AUTHOR", path=the_feed.file).value
+    page_url    = the_feed.file.page_set.first().urlize_self()
+    updated_at  = arrow.get(entries[0].pubdate).format(arrow.FORMAT_RFC3339) # entries is already sorted
+
+    # node-id -> URL
    links = the_feed.file.page_set.first().collect_links()
-    entries = the_feed.feedentry_set.order_by("-pubdate").all()[:10]
+    # node-id -> HTML
    html_map = {
        entry.heading.node_id: entry.to_html(links=links) for entry in entries
    }
+    # node-id -> PUBDATE heading property
    pubdate_map = {
        entry.heading.node_id: arrow.get(entry.pubdate).format(arrow.FORMAT_RFC3339) for entry in entries
    }

-    page_author = roam.models.Keyword.objects.get(keyword="AUTHOR", path=the_feed.file).value
-
    # return HttpResponse("",content_type="application/atom+xml")
    return render(request, "arcology/feed.xml", dict(
-        title="Test",
-        page_url=the_feed.file.page_set.first().urlize_self(),
+        title=the_feed.title,
+        page_url=page_url,
        author=page_author,
-        updated_at=arrow.get(entries[0].pubdate).format(arrow.FORMAT_RFC3339),
+        updated_at=updated_at,

        feed_entries=entries,
        htmls=html_map,
        pubdates=pubdate_map,
-
        links=links,
    ), content_type="application/atom+xml")

--- a/arcology/models.py
+++ b/arcology/models.py
@ -115,7 +115,7 @@ class Page(models.Model):
            link_retargets=links,
            limit_headings=headings,
            include_subheadings=include_subheadings,
-            ignore_tags=[],
+            ignore_tags=settings.IGNORED_ROAM_TAGS,
        )
        return native.htmlize_file(self.file.path, opts)

@ -227,7 +227,7 @@ class FeedEntry(models.Model):
            link_retargets=links,
            limit_headings=[self.heading.node_id],
            include_subheadings=True,
-            ignore_tags=[],
+            ignore_tags=settings.IGNORED_ROAM_TAGS,
        )
        return native.htmlize_file(self.heading.path.path, opts)

--- a/arcology/settings/generators.py
+++ b/arcology/settings/generators.py
@ -74,3 +74,12 @@ ROAM_ALLOWED_KEYWORDS = [
    "ARROYO_MODULE_DEP",
 ]
 # The Code:1 ends here
+
+# [[file:../../configuration.org::*The Code][The Code:1]]
+IGNORED_ROAM_TAGS = [
+  "NOEXPORT", # nil
+  "noexport", # nil
+  "Private", # nil
+  
+]
+# The Code:1 ends here
--- a/configuration.org
+++ b/configuration.org
@ -363,6 +363,24 @@ ROAM_ALLOWED_KEYWORDS = [
 ]
 #+END_SRC

+** Ignored Roam Tags
+
+Headings with these tags will not be exported.
+
+#+NAME: ignored_roam_tags
+| NOEXPORT |
+| noexport |
+| Private  |
+
+*** The Code
+
+#+BEGIN_SRC python :tangle arcology/settings/generators.py :noweb yes
+IGNORED_ROAM_TAGS = [
+  <<gen-config-list(ignored_roam_tags)>>
+]
+#+END_SRC
+
+
 * Look don't worry about the rest of these

 #+BEGIN_SRC python :tangle arcology/settings/__init__.py :noweb yes
--- a/flake.lock
+++ b/flake.lock
@ -6,11 +6,11 @@
        "nixpkgs": "nixpkgs"
      },
      "locked": {
-        "lastModified": 1707101614,
-        "narHash": "sha256-ZZbBX1/B0eku2Ww2luX4MzSIP5gONa6Tf2mmkmdQ2Ss=",
+        "lastModified": 1707114472,
+        "narHash": "sha256-yYDz01/0m1ObUU6ULCg1itjKnJWMu02GhG0E4etMA+0=",
        "ref": "refs/heads/main",
-        "rev": "52258e2db7047335a9ee7b51121960b394fb3619",
-        "revCount": 146,
+        "rev": "f890e941bbcb4e3402a8c23464373badf1180065",
+        "revCount": 148,
        "type": "git",
        "url": "https://code.rix.si/rrix/arroyo"
      },
Author	SHA1	Message	Date
Ryan Rix	61fef83735	memoize the HTML export calls	2024-02-04 22:31:54 -08:00
Ryan Rix	3919e79f51	make sure we are ignoring tags properly	2024-02-04 22:31:22 -08:00