Compare commits

...

3 Commits

Author SHA1 Message Date
Ryan Rix 22351ef1df cache the sitemap.json response 2024-03-05 13:22:09 -08:00
Ryan Rix 2a04bc2286 add caching to org page views and feeds and sidebar 2024-03-05 13:21:44 -08:00
Ryan Rix c9ea31e9fb add a cache decorate that uses the django caching framework 2024-03-05 13:21:23 -08:00
12 changed files with 165 additions and 38 deletions

View File

@ -20,9 +20,9 @@ from django.conf import settings
from django_prometheus.models import ExportModelOperationsMixin as EMOM
import arrow
import functools
import arroyo.arroyo_rs as native
from arcology.cache_decorator import cache
import roam.models
@ -210,7 +210,7 @@ class Page(EMOM('page'), models.Model):
def to_html(self, links, heading=None, include_subheadings=False):
return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)
@functools.lru_cache(maxsize=500)
@cache(key_prefix="page_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
if heading is not None:
headings = [heading]
@ -410,7 +410,7 @@ class FeedEntry(EMOM('feed_entry'), models.Model):
def to_html(self, links):
return self._to_html_memoized(hashabledict(links), self.heading.path.digest)
@functools.lru_cache(maxsize=500)
@cache(key_prefix="feedentry_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, _file_digest):
opts = native.ExportOptions(
link_retargets=links,
@ -679,10 +679,12 @@ If the site has any feeds, they're injected in to the =<head>= along with any pa
The main =content= block contains the =<main>= generated by the native parser, and a sidebar containing backlinks, and page metadata, and other crap.
#+begin_src jinja2 :tangle arcology/templates/arcology/page.html
{% load cache %}
{% block content %}
{# HTML is sent through without HTML Escaping via | safe #}
{{ html_content | safe }}
{% cache 604800 sidebar the_page.hash %}
<section class="sidebar">
{% if backlinks|length > 0 %}
<div class="backlinks">
@ -728,6 +730,7 @@ The main =content= block contains the =<main>= generated by the native parser, a
</div>
{% endif %}
</section>
{% endcache %}
{% endblock %}
#+end_src

View File

@ -0,0 +1,29 @@
# [[file:../scaffolding.org::*File-backed HTML/Atom cache][File-backed HTML/Atom cache:2]]
import pathlib
from django.core.cache import caches
import logging
logger = logging.getLogger(__name__)
def cache(key_prefix="", cache_connection="default", expire_secs=600):
def return_decoration(func):
def wrapper(*args, **kwargs):
cache = caches["default"]
key = args
for k, v in kwargs.items():
key += tuple(k,v)
cache_key = f"{key_prefix}/{hash(key)}"
ret = cache.get(cache_key)
if ret is None:
logger.debug("cache_miss")
ret = func(*args, **kwargs)
cache.set(cache_key, ret, expire_secs)
else:
logger.debug("cache_hit")
return ret
return wrapper
return return_decoration
# File-backed HTML/Atom cache:2 ends here

View File

@ -6,9 +6,9 @@ from django.conf import settings
from django_prometheus.models import ExportModelOperationsMixin as EMOM
import arrow
import functools
import arroyo.arroyo_rs as native
from arcology.cache_decorator import cache
import roam.models
@ -139,7 +139,7 @@ class Page(EMOM('page'), models.Model):
def to_html(self, links, heading=None, include_subheadings=False):
return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)
@functools.lru_cache(maxsize=500)
@cache(key_prefix="page_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
if heading is not None:
headings = [heading]
@ -255,7 +255,7 @@ class FeedEntry(EMOM('feed_entry'), models.Model):
def to_html(self, links):
return self._to_html_memoized(hashabledict(links), self.heading.path.digest)
@functools.lru_cache(maxsize=500)
@cache(key_prefix="feedentry_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, _file_digest):
opts = native.ExportOptions(
link_retargets=links,

View File

@ -22,6 +22,16 @@ DEBUG = (ARCOLOGY_ENVIRONMENT != "production")
SYNCTHING_KEY = os.getenv("ARCOLOGY_SYNCTHING_KEY")
# Environment Variables:3 ends here
# [[file:../../configuration.org::*Environment Variables][Environment Variables:4]]
BASE_CACHE_PATH = os.environ.get("ARCOLOGY_CACHE_PATH", '/var/tmp/django_cache')
CACHES = {
'default': {
'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
'LOCATION': BASE_CACHE_PATH,
},
}
# Environment Variables:4 ends here
# [[file:../../configuration.org::*Hostname configuration from =arcology.model.Site=, eventually][Hostname configuration from =arcology.model.Site=, eventually:1]]
ALLOWED_HOSTS = "thelionsrear.com,rix.si,arcology.garden,whatthefuck.computer,cce.whatthefuck.computer,cce.rix.si,engine.arcology.garden,127.0.0.1,localhost,v2.thelionsrear.com,v2.arcology.garden,cce2.whatthefuck.computer,engine2.arcology.garden".split(',')
# Hostname configuration from =arcology.model.Site=, eventually:1 ends here
@ -71,6 +81,10 @@ LOGGING = {
"handlers": ["console"],
"level": os.getenv("DJANGO_LOG_LEVEL", "INFO"),
"propagate": False,
},
"arcology.cache_decorator": { # left as an example to change later.
"handlers": ["console"],
"level": "INFO",
}
},
"root": {
@ -94,13 +108,6 @@ MIDDLEWARE = [
"django_prometheus.middleware.PrometheusAfterMiddleware",
]
CACHES = {
'default': {
'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
'LOCATION': '/var/tmp/django_cache',
}
}
ROOT_URLCONF = "arcology.urls"
TEMPLATES = [

View File

@ -1,4 +1,4 @@
{# [[file:../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:5]] #}
{# [[file:../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:5]] #}
{% extends "arcology/app.html" %}
{% block title %}Page Not Found{% endblock %}
@ -26,4 +26,4 @@
</section>
{% endblock %}
{# Rendering the converted Org HTML in to a whole web-page:5 ends here #}
{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:5 ends here #}

View File

@ -1,12 +1,12 @@
{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:1]] #}
{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:1]] #}
{% extends "arcology/app.html" %}
{# Rendering the converted Org HTML in to a whole web-page:1 ends here #}
{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:1 ends here #}
{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:2]] #}
{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:2]] #}
{% block title %}{{ head_title }}{% endblock %}
{# Rendering the converted Org HTML in to a whole web-page:2 ends here #}
{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:2 ends here #}
{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:3]] #}
{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:3]] #}
{% block extra_head %}
{% for feed in feeds %}
<link rel="alternate" type="application/atom+xml" href="{{ feed.url }}" title="{{ feed.title }}" />
@ -17,13 +17,15 @@
<meta name="robots" content=""/>
{% endif %}
{% endblock %}
{# Rendering the converted Org HTML in to a whole web-page:3 ends here #}
{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:3 ends here #}
{# [[file:../../../arcology.org::*Rendering the converted Org HTML in to a whole web-page][Rendering the converted Org HTML in to a whole web-page:4]] #}
{# [[file:../../../arcology.org::*=arcology/page.html= extends =app.html= to embed the Org page and its metadata][=arcology/page.html= extends =app.html= to embed the Org page and its metadata:4]] #}
{% load cache %}
{% block content %}
{# HTML is sent through without HTML Escaping via | safe #}
{{ html_content | safe }}
{% cache 604800 sidebar the_page.hash %}
<section class="sidebar">
{% if backlinks|length > 0 %}
<div class="backlinks">
@ -69,5 +71,6 @@
</div>
{% endif %}
</section>
{% endcache %}
{% endblock %}
{# Rendering the converted Org HTML in to a whole web-page:4 ends here #}
{# =arcology/page.html= extends =app.html= to embed the Org page and its metadata:4 ends here #}

View File

@ -228,6 +228,18 @@ The =ARCOLOGY_SYNCTHING_KEY= environment variable will be set to the [[id:cce/sy
SYNCTHING_KEY = os.getenv("ARCOLOGY_SYNCTHING_KEY")
#+END_SRC
The =ARCOLOGY_CACHE_PATH= is set to a path that multi-process django can use to cache processed HTML and Atom between processes.
#+BEGIN_SRC python :tangle arcology/settings/__init__.py
BASE_CACHE_PATH = os.environ.get("ARCOLOGY_CACHE_PATH", '/var/tmp/django_cache')
CACHES = {
'default': {
'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
'LOCATION': BASE_CACHE_PATH,
},
}
#+END_SRC
** NEXT Hostname configuration from =arcology.model.Site=, eventually
When I have the sites organized in an org-mode table, i'll reapproach the hostname list, and probably before then when i want to test domain-based routing.
@ -293,6 +305,10 @@ LOGGING = {
"handlers": ["console"],
"level": os.getenv("DJANGO_LOG_LEVEL", "INFO"),
"propagate": False,
},
"arcology.cache_decorator": { # left as an example to change later.
"handlers": ["console"],
"level": "INFO",
}
},
"root": {
@ -455,13 +471,6 @@ MIDDLEWARE = [
"django_prometheus.middleware.PrometheusAfterMiddleware",
]
CACHES = {
'default': {
'BACKEND': 'django_prometheus.cache.backends.filebased.FileBasedCache',
'LOCATION': '/var/tmp/django_cache',
}
}
ROOT_URLCONF = "arcology.urls"
TEMPLATES = [

View File

@ -5,7 +5,11 @@ version = "0.0.1"
description = "org-mode metadata query engine, publishing platform, and computer metaprogrammer"
# license = "Hey Smell This"
readme = "README.md"
dependencies = ["click ~=8.1", "django ~= 4.2", "django-stub", "polling", "django-prometheus", "arroyo", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17" ]
dependencies = [
"django ~= 4.2", "django-stub", "django-prometheus",
"click ~=8.1", "polling", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17"
"arroyo",
]
requires-python = ">=3.10"
authors = [
{ name = "Ryan Rix", email = "code@whatthefuck.computer" }

View File

@ -2,6 +2,7 @@
:ID: arcology/django/scaffolding
:END:
#+TITLE: Arcology Project Scaffolding
#+filetags: :Project:
#+ARCOLOGY_KEY: arcology/scaffolding
* Dev Environment
@ -17,7 +18,11 @@ version = "0.0.1"
description = "org-mode metadata query engine, publishing platform, and computer metaprogrammer"
# license = "Hey Smell This"
readme = "README.md"
dependencies = ["click ~=8.1", "django ~= 4.2", "django-stub", "polling", "django-prometheus", "arroyo", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17" ]
dependencies = [
"django ~= 4.2", "django-stub", "django-prometheus",
"click ~=8.1", "polling", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17"
"arroyo",
]
requires-python = ">=3.10"
authors = [
{ name = "Ryan Rix", email = "code@whatthefuck.computer" }
@ -408,3 +413,68 @@ class AgentClassification(str, Enum):
" return cls." enum "\n")))
(apply #'concat))
#+end_src
** File-backed HTML/Atom cache
:PROPERTIES:
:ID: 20240305T122458.841243
:ROAM_ALIASES: "arcology.file_cache.str_file_cache decorator"
:END:
I got away with using =functools.lru_cache= with the FastAPI prototype because uvicorn was single-process, but now we're deploying a WSGI app on multi-process =gunicorn= so the memory that the =lru_cache= writes to is not shared between the processes[fn:1:Maybe some day the GIL won't get in the way, alas]. I don't feel like trying to get the Arcology to work as ASGI Django is worth the trouble, there would be too many multi-colored functions duplicated between the sync workers and the async workers.
There are currently four invocations of =lru_cache= in this code-base they're all caching big huge strings. It's easier perhaps to swap in a thing which writes those HTML strings to files. The call-sites all have the source-file's =sha256= sum so that those =lru_cache= functions have a cache-breaking key, this can still be a cache-breaking key, just on the filesystem instead. God-speed to whoever deploys the Arcology to a multi-system Kubernetes cluster.
So now you can do this:
#+begin_src python
import arcology.file_cache as fc
@fc.cache_string(cache_prefix="/tmp/strs")
def gimme(hk):
return "hello, world!"
gimme(1)
#+end_src
Writing a wrapper like this is sort of funny to look at.
Consider the =@fc.str_file_cache()= invocation above.
- That calls the outer-most function =str_file_cache= below, which returns the un-evaluated function =return_decoration=
- The decorator system then invokes *that*, passing the =gimme= function in to it
- *that* returns a =wrapper= function which is the thing that is invoked when =gimme(1)= is invoked.
All this nesting is necessary to keep pass arguments in to the decorator, and to have access to the inner function's arguments to calculate the hash key.
#+begin_src python :tangle arcology/cache_decorator.py
import pathlib
from django.core.cache import caches
import logging
logger = logging.getLogger(__name__)
def cache(key_prefix="", cache_connection="default", expire_secs=600):
def return_decoration(func):
def wrapper(*args, **kwargs):
cache = caches["default"]
key = args
for k, v in kwargs.items():
key += tuple(k,v)
cache_key = f"{key_prefix}/{hash(key)}"
ret = cache.get(cache_key)
if ret is None:
logger.debug("cache_miss")
ret = func(*args, **kwargs)
cache.set(cache_key, ret, expire_secs)
else:
logger.debug("cache_hit")
return ret
return wrapper
return return_decoration
#+end_src
*** NEXT I need to make sure to write some code to do cache-invalidation before it becomes a problem, too...
could also just use [[https://www.man7.org/linux/man-pages/man8/systemd-tmpfiles.8.html][=systemd-tmpfiles=]]..!

View File

@ -235,7 +235,7 @@ compose together and return in a single dict for JSON rendering shaped like:
#+begin_src python :tangle sitemap/models.py :mkdirp yes
import arcology.models
import roam.models
import functools
from arcology.cache_decorator import cache
import hashlib
@ -247,7 +247,7 @@ def make_loc_hash(page: arcology.models.Page, salt, max_q=700):
class Node():
@classmethod
def make_page_dict(cls, page):
@functools.lru_cache(maxsize=5000)
@cache(key_prefix="sitemap_node", expire_secs=60*60*24)
def _make(page, hash):
link_cnt = page.file.outbound_links.count()
backlink_cnt = roam.models.Link.objects.filter(dest_heading__in=page.file.heading_set.all()).count()
@ -310,8 +310,9 @@ This tries to calculate a consistent cache key cheaply and probably fails.
#+begin_src python :tangle sitemap/views.py :mkdirp yes
import hashlib
from arcology.cache_decorator import cache
@functools.lru_cache(maxsize=20)
@cache(key_prefix="sitemap_resp", expire_secs=60*60*24)
def _cached(cache_key, hashes):
print(f"called w/ cache key {cache_key}")
ret = dict(

View File

@ -1,7 +1,7 @@
# [[file:../sitemap.org::*Making SigmaJS Nodes][Making SigmaJS Nodes:1]]
import arcology.models
import roam.models
import functools
from arcology.cache_decorator import cache
import hashlib
@ -13,7 +13,7 @@ def make_loc_hash(page: arcology.models.Page, salt, max_q=700):
class Node():
@classmethod
def make_page_dict(cls, page):
@functools.lru_cache(maxsize=5000)
@cache(key_prefix="sitemap_node", expire_secs=60*60*24)
def _make(page, hash):
link_cnt = page.file.outbound_links.count()
backlink_cnt = roam.models.Link.objects.filter(dest_heading__in=page.file.heading_set.all()).count()

View File

@ -50,8 +50,9 @@ def tag_page(request, tag: str):
# [[file:../sitemap.org::*JSON Handler/View][JSON Handler/View:1]]
import hashlib
from arcology.cache_decorator import cache
@functools.lru_cache(maxsize=20)
@cache(key_prefix="sitemap_resp", expire_secs=60*60*24)
def _cached(cache_key, hashes):
print(f"called w/ cache key {cache_key}")
ret = dict(