cache the sitemap.json response

add caching to org page views and feeds and sidebar
add a cache decorate that uses the django caching framework
2024-03-05 13:22:09 -08:00 · 2024-03-05 13:21:44 -08:00 · 2024-03-05 13:21:23 -08:00
12 changed files with 165 additions and 38 deletions
--- a/arcology.org
+++ b/arcology.org
@ -20,9 +20,9 @@ from django.conf import settings
 from django_prometheus.models import ExportModelOperationsMixin as EMOM

 import arrow
-import functools

 import arroyo.arroyo_rs as native
+from arcology.cache_decorator import cache

 import roam.models

@ -210,7 +210,7 @@ class Page(EMOM('page'), models.Model):
    def to_html(self, links, heading=None, include_subheadings=False):
        return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)

-    @functools.lru_cache(maxsize=500)
+    @cache(key_prefix="page_html", expire_secs=60*60*24*7)
    def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
        if heading is not None:
            headings = [heading]
@ -410,7 +410,7 @@ class FeedEntry(EMOM('feed_entry'), models.Model):
    def to_html(self, links):
        return self._to_html_memoized(hashabledict(links), self.heading.path.digest)

-    @functools.lru_cache(maxsize=500)
+    @cache(key_prefix="feedentry_html", expire_secs=60*60*24*7)
    def _to_html_memoized(self, links, _file_digest):
        opts = native.ExportOptions(
            link_retargets=links,
@ -679,10 +679,12 @@ If the site has any feeds, they're injected in to the =<head>= along with any pa
 The main =content= block contains the =<main>= generated by the native parser, and a sidebar containing backlinks, and page metadata, and other crap.

 #+begin_src jinja2 :tangle arcology/templates/arcology/page.html
+{% load cache %}
 {% block content %}
  {# HTML is sent through without HTML Escaping via | safe #}
  {{ html_content | safe }}

+  {% cache 604800 sidebar the_page.hash %}
  <section class="sidebar">
    {% if backlinks|length > 0 %}
    <div class="backlinks">
@ -728,6 +730,7 @@ The main =content= block contains the =<main>= generated by the native parser, a
    </div>
    {% endif %}
  </section>
+  {% endcache %}
 {% endblock %}
 #+end_src

--- a/arcology/cache_decorator.py
+++ b/arcology/cache_decorator.py
@ -0,0 +1,29 @@
+# [[file:../scaffolding.org::*File-backed HTML/Atom cache][File-backed HTML/Atom cache:2]]
+import pathlib
+from django.core.cache import caches
+
+import logging
+logger = logging.getLogger(__name__)
+
+def cache(key_prefix="", cache_connection="default", expire_secs=600):
+  def return_decoration(func):
+
+    def wrapper(*args, **kwargs):
+      cache = caches["default"]
+      key = args
+      for k, v in kwargs.items():
+        key += tuple(k,v)
+      cache_key = f"{key_prefix}/{hash(key)}"
+
+      ret = cache.get(cache_key)
+      if ret is None:
+        logger.debug("cache_miss")
+        ret = func(*args, **kwargs)
+        cache.set(cache_key, ret, expire_secs)
+      else:
+        logger.debug("cache_hit")
+      return ret
+
+    return wrapper
+  return return_decoration
+# File-backed HTML/Atom cache:2 ends here
--- a/arcology/models.py
+++ b/arcology/models.py
@ -6,9 +6,9 @@ from django.conf import settings
 from django_prometheus.models import ExportModelOperationsMixin as EMOM

 import arrow
-import functools

 import arroyo.arroyo_rs as native
+from arcology.cache_decorator import cache

 import roam.models

@ -139,7 +139,7 @@ class Page(EMOM('page'), models.Model):
    def to_html(self, links, heading=None, include_subheadings=False):
        return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)

-    @functools.lru_cache(maxsize=500)
+    @cache(key_prefix="page_html", expire_secs=60*60*24*7)
    def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
        if heading is not None:
            headings = [heading]
@ -255,7 +255,7 @@ class FeedEntry(EMOM('feed_entry'), models.Model):
    def to_html(self, links):
        return self._to_html_memoized(hashabledict(links), self.heading.path.digest)

-    @functools.lru_cache(maxsize=500)
+    @cache(key_prefix="feedentry_html", expire_secs=60*60*24*7)
    def _to_html_memoized(self, links, _file_digest):
        opts = native.ExportOptions(
            link_retargets=links,
--- a/arcology/settings/init.py
+++ b/arcology/settings/init.py
@ -22,6 +22,16 @@ DEBUG = (ARCOLOGY_ENVIRONMENT != "production")
 SYNCTHING_KEY = os.getenv("ARCOLOGY_SYNCTHING_KEY")
 # Environment Variables:3 ends here

+# [[file:../../configuration.org::*Environment Variables][Environment Variables:4]]
+BASE_CACHE_PATH = os.environ.get("ARCOLOGY_CACHE_PATH", '/var/tmp/django_cache')
+CACHES = {
+    'default': {
+        'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
+        'LOCATION': BASE_CACHE_PATH,
+    },
+}
+# Environment Variables:4 ends here
+
 # [[file:../../configuration.org::*Hostname configuration from =arcology.model.Site=, eventually][Hostname configuration from =arcology.model.Site=, eventually:1]]
 ALLOWED_HOSTS = "thelionsrear.com,rix.si,arcology.garden,whatthefuck.computer,cce.whatthefuck.computer,cce.rix.si,engine.arcology.garden,127.0.0.1,localhost,v2.thelionsrear.com,v2.arcology.garden,cce2.whatthefuck.computer,engine2.arcology.garden".split(',')
 # Hostname configuration from =arcology.model.Site=, eventually:1 ends here
@ -71,6 +81,10 @@ LOGGING = {
            "handlers": ["console"],
            "level": os.getenv("DJANGO_LOG_LEVEL", "INFO"),
            "propagate": False,
+        },
+        "arcology.cache_decorator": { # left as an example to change later.
+            "handlers": ["console"],
+            "level": "INFO",
        }
    },
    "root": {
@ -94,13 +108,6 @@ MIDDLEWARE = [
    "django_prometheus.middleware.PrometheusAfterMiddleware",
 ]

-CACHES = {
-    'default': {
-        'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
-        'LOCATION': '/var/tmp/django_cache',
-    }
-}
-
 ROOT_URLCONF = "arcology.urls"

 TEMPLATES = [
--- a/arcology/templates/404.html
+++ b/arcology/templates/404.html
@ -1,4 +1,4 @@
-{# [[file:../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:5]] #}
+{# [[file:../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:5]] #}
 {% extends "arcology/app.html" %}

 {% block title %}Page Not Found{% endblock %}
@ -26,4 +26,4 @@

 </section>
 {% endblock %}
-{# Rendering the converted Org HTML in to a whole web-page:5 ends here #}
+{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:5 ends here #}
--- a/arcology/templates/arcology/page.html
+++ b/arcology/templates/arcology/page.html
@ -1,12 +1,12 @@
-{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:1]] #}
+{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:1]] #}
 {% extends "arcology/app.html" %}
-{# Rendering the converted Org HTML in to a whole web-page:1 ends here #}
+{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:1 ends here #}

-{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:2]] #}
+{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:2]] #}
 {% block title %}{{ head_title }}{% endblock %}
-{# Rendering the converted Org HTML in to a whole web-page:2 ends here #}
+{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:2 ends here #}

-{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:3]] #}
+{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:3]] #}
 {% block extra_head %}
  {% for feed in feeds %}
    <link rel="alternate" type="application/atom+xml" href="{{ feed.url }}" title="{{ feed.title }}" />
@ -17,13 +17,15 @@
    <meta name="robots" content=""/>
  {% endif %}
 {% endblock %}
-{# Rendering the converted Org HTML in to a whole web-page:3 ends here #}
+{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:3 ends here #}

-{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:4]] #}
+{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:4]] #}
+{% load cache %}
 {% block content %}
  {# HTML is sent through without HTML Escaping via | safe #}
  {{ html_content | safe }}

+  {% cache 604800 sidebar the_page.hash %}
  <section class="sidebar">
    {% if backlinks|length > 0 %}
    <div class="backlinks">
@ -69,5 +71,6 @@
    </div>
    {% endif %}
  </section>
+  {% endcache %}
 {% endblock %}
-{# Rendering the converted Org HTML in to a whole web-page:4 ends here #}
+{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:4 ends here #}
--- a/configuration.org
+++ b/configuration.org
@ -228,6 +228,18 @@ The =ARCOLOGY_SYNCTHING_KEY= environment variable will be set to the [[id:cce/sy
 SYNCTHING_KEY = os.getenv("ARCOLOGY_SYNCTHING_KEY")
 #+END_SRC

+The =ARCOLOGY_CACHE_PATH= is set to a path that multi-process django can use to cache processed HTML and Atom between processes.
+
+#+BEGIN_SRC python :tangle arcology/settings/__init__.py
+BASE_CACHE_PATH = os.environ.get("ARCOLOGY_CACHE_PATH", '/var/tmp/django_cache')
+CACHES = {
+    'default': {
+        'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
+        'LOCATION': BASE_CACHE_PATH,
+    },
+}
+#+END_SRC
+
 ** NEXT Hostname configuration from =arcology.model.Site=, eventually

 When I have the sites organized in an org-mode table, i'll reapproach the hostname list, and probably before then when i want to test domain-based routing.
@ -293,6 +305,10 @@ LOGGING = {
            "handlers": ["console"],
            "level": os.getenv("DJANGO_LOG_LEVEL", "INFO"),
            "propagate": False,
+        },
+        "arcology.cache_decorator": { # left as an example to change later.
+            "handlers": ["console"],
+            "level": "INFO",
        }
    },
    "root": {
@ -455,13 +471,6 @@ MIDDLEWARE = [
    "django_prometheus.middleware.PrometheusAfterMiddleware",
 ]

-CACHES = {
-    'default': {
-        'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
-        'LOCATION': '/var/tmp/django_cache',
-    }
-}
-
 ROOT_URLCONF = "arcology.urls"

 TEMPLATES = [
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,11 @@ version = "0.0.1"
 description = "org-mode metadata query engine, publishing platform, and computer metaprogrammer"
 # license = "Hey Smell This"
 readme = "README.md"
-dependencies = ["click ~=8.1", "django ~= 4.2", "django-stub", "polling", "django-prometheus", "arroyo", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17" ]
+dependencies = [
+    "django ~= 4.2", "django-stub", "django-prometheus",
+    "click ~=8.1", "polling", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17"
+    "arroyo",
+]
 requires-python = ">=3.10"
 authors = [
    { name = "Ryan Rix", email = "code@whatthefuck.computer" }
--- a/scaffolding.org
+++ b/scaffolding.org
@ -2,6 +2,7 @@
 :ID:       arcology/django/scaffolding
 :END:
 #+TITLE: Arcology Project Scaffolding
+#+filetags: :Project:
 #+ARCOLOGY_KEY: arcology/scaffolding

 * Dev Environment
@ -17,7 +18,11 @@ version = "0.0.1"
 description = "org-mode metadata query engine, publishing platform, and computer metaprogrammer"
 # license = "Hey Smell This"
 readme = "README.md"
-dependencies = ["click ~=8.1", "django ~= 4.2", "django-stub", "polling", "django-prometheus", "arroyo", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17" ]
+dependencies = [
+    "django ~= 4.2", "django-stub", "django-prometheus",
+    "click ~=8.1", "polling", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17"
+    "arroyo",
+]
 requires-python = ">=3.10"
 authors = [
    { name = "Ryan Rix", email = "code@whatthefuck.computer" }
@ -408,3 +413,68 @@ class AgentClassification(str, Enum):
                    "    return cls." enum "\n")))
  (apply #'concat))
 #+end_src
+
+** File-backed HTML/Atom cache
+:PROPERTIES:
+:ID:       20240305T122458.841243
+:ROAM_ALIASES: "arcology.file_cache.str_file_cache decorator"
+:END:
+
+I got away with using =functools.lru_cache= with the FastAPI prototype because uvicorn was single-process, but now we're deploying a WSGI app on multi-process =gunicorn= so the memory that the =lru_cache= writes to is not shared between the processes[fn:1:Maybe some day the GIL won't get in the way, alas]. I don't feel like trying to get the Arcology to work as ASGI Django is worth the trouble, there would be too many multi-colored functions duplicated between the sync workers and the async workers.
+
+There are currently four invocations of =lru_cache= in this code-base they're all caching big huge strings. It's easier perhaps to swap in a thing which writes those HTML strings to files. The call-sites all have the source-file's =sha256= sum so that those =lru_cache= functions have a cache-breaking key, this can still be a cache-breaking key, just on the filesystem instead. God-speed to whoever deploys the Arcology to a multi-system Kubernetes cluster.
+
+So now you can do this:
+
+#+begin_src python
+import arcology.file_cache as fc
+
+@fc.cache_string(cache_prefix="/tmp/strs")
+def gimme(hk):
+  return "hello, world!"
+
+gimme(1)
+#+end_src
+
+Writing a wrapper like this is sort of funny to look at.
+
+Consider the =@fc.str_file_cache()= invocation above.
+- That calls the outer-most function =str_file_cache= below, which returns the un-evaluated function =return_decoration=
+- The decorator system then invokes *that*, passing the =gimme= function in to it
+- *that* returns a =wrapper= function which is the thing that is invoked when =gimme(1)= is invoked.
+
+All this nesting is necessary to keep pass arguments in to the decorator, and to have access to the inner function's arguments to calculate the hash key.
+
+#+begin_src python :tangle arcology/cache_decorator.py
+import pathlib
+from django.core.cache import caches
+
+import logging
+logger = logging.getLogger(__name__)
+
+def cache(key_prefix="", cache_connection="default", expire_secs=600):
+  def return_decoration(func):
+
+    def wrapper(*args, **kwargs):
+      cache = caches["default"]
+      key = args
+      for k, v in kwargs.items():
+        key += tuple(k,v)
+      cache_key = f"{key_prefix}/{hash(key)}"
+
+      ret = cache.get(cache_key)
+      if ret is None:
+        logger.debug("cache_miss")
+        ret = func(*args, **kwargs)
+        cache.set(cache_key, ret, expire_secs)
+      else:
+        logger.debug("cache_hit")
+      return ret
+
+    return wrapper
+  return return_decoration
+#+end_src
+
+*** NEXT I need to make sure to write some code to do cache-invalidation before it becomes a problem, too...
+
+could also just use [[https://www.man7.org/linux/man-pages/man8/systemd-tmpfiles.8.html][=systemd-tmpfiles=]]..!
--- a/sitemap.org
+++ b/sitemap.org
@ -235,7 +235,7 @@ compose together and return in a single dict for JSON rendering shaped like:
 #+begin_src python :tangle sitemap/models.py :mkdirp yes
 import arcology.models
 import roam.models
-import functools
+from arcology.cache_decorator import cache

 import hashlib

@ -247,7 +247,7 @@ def make_loc_hash(page: arcology.models.Page, salt, max_q=700):
 class Node():
  @classmethod
  def make_page_dict(cls, page):
-    @functools.lru_cache(maxsize=5000)
+    @cache(key_prefix="sitemap_node", expire_secs=60*60*24)
    def _make(page, hash):
      link_cnt = page.file.outbound_links.count()
      backlink_cnt = roam.models.Link.objects.filter(dest_heading__in=page.file.heading_set.all()).count()
@ -310,8 +310,9 @@ This tries to calculate a consistent cache key cheaply and probably fails.

 #+begin_src python :tangle sitemap/views.py :mkdirp yes
 import hashlib
+from arcology.cache_decorator import cache

-@functools.lru_cache(maxsize=20)
+@cache(key_prefix="sitemap_resp", expire_secs=60*60*24)
 def _cached(cache_key, hashes):
    print(f"called w/ cache key {cache_key}")
    ret = dict(
--- a/sitemap/models.py
+++ b/sitemap/models.py
@ -1,7 +1,7 @@
 # [[file:../sitemap.org::*Making SigmaJS Nodes][Making SigmaJS Nodes:1]]
 import arcology.models
 import roam.models
-import functools
+from arcology.cache_decorator import cache

 import hashlib

@ -13,7 +13,7 @@ def make_loc_hash(page: arcology.models.Page, salt, max_q=700):
 class Node():
  @classmethod
  def make_page_dict(cls, page):
-    @functools.lru_cache(maxsize=5000)
+    @cache(key_prefix="sitemap_node", expire_secs=60*60*24)
    def _make(page, hash):
      link_cnt = page.file.outbound_links.count()
      backlink_cnt = roam.models.Link.objects.filter(dest_heading__in=page.file.heading_set.all()).count()
--- a/sitemap/views.py
+++ b/sitemap/views.py
@ -50,8 +50,9 @@ def tag_page(request, tag: str):

 # [[file:../sitemap.org::*JSON Handler/View][JSON Handler/View:1]]
 import hashlib
+from arcology.cache_decorator import cache

-@functools.lru_cache(maxsize=20)
+@cache(key_prefix="sitemap_resp", expire_secs=60*60*24)
 def _cached(cache_key, hashes):
    print(f"called w/ cache key {cache_key}")
    ret = dict(
Author	SHA1	Message	Date
Ryan Rix	22351ef1df	cache the sitemap.json response	2024-03-05 13:22:09 -08:00
Ryan Rix	2a04bc2286	add caching to org page views and feeds and sidebar	2024-03-05 13:21:44 -08:00
Ryan Rix	c9ea31e9fb	add a cache decorate that uses the django caching framework	2024-03-05 13:21:23 -08:00