arcology/scaffolding.org

485 lines
15 KiB
Org Mode

:PROPERTIES:
:ID: arcology/django/scaffolding
:END:
#+TITLE: Arcology Project Scaffolding
#+filetags: :Project:
#+ARCOLOGY_KEY: arcology/scaffolding
* Dev Environment
** Python Project
The =pyproject.toml= file is slowly starting consume all of the different configuration files a [[id:cce/python][Python]] project needs. that's nice.
#+begin_src python :tangle pyproject.toml
[project]
name = "arcology"
version = "0.0.1"
description = "org-mode metadata query engine, publishing platform, and computer metaprogrammer"
# license = "Hey Smell This"
readme = "README.md"
dependencies = [
"django ~= 4.2", "django-stub", "django-prometheus",
"click ~=8.1", "polling", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17",
"arroyo"
]
requires-python = ">=3.10"
authors = [
{ name = "Ryan Rix", email = "code@whatthefuck.computer" }
]
[project.scripts]
"arcology" = "arcology:django_manage"
[tool.setuptools]
package-dir = {"" = "."}
[tool.setuptools.package-data]
arcology = [
'settings/sites.json',
'static/arcology/js/*',
'static/arcology/css/*',
'static/arcology/fonts/*',
'templates/arcology/*',
'templates/*',
]
sitemap = [
'static/sitemap/js/*',
'static/sitemap/css/*',
'templates/sitemap/*',
]
[tool.setuptools.packages.find]
where = ["."]
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
#+end_src
** Nix package for the service
=nix build= will spit out a python project that can be used in a NixOS definition. now where would we get one of those...? It's marked with =licenses.unfree= right now because I don't think [[id:20220116T143655.499306][Hey Smell This]] will pass the OSI sniff-test.
#+begin_src nix :tangle default.nix
{
pkgs ? import <nixpkgs> {},
lib ? pkgs.lib,
python3,
arroyo_rs,
}:
python3.pkgs.buildPythonPackage rec {
pname = "arcology";
version = "0.0.1";
format = "pyproject";
src = ./.;
nativeBuildInputs = with pkgs; [];
propagatedBuildInputs = (with pkgs; [
arroyo_rs
]) ++ (with python3.pkgs; [
arrow
click
django_4
django-prometheus
django-htmx
(django-stubs-ext.override { django = django_4; })
(django-stubs.override { django = django_4; })
gunicorn
polling
setuptools
]);
passthru.gunicorn = python3.pkgs.gunicorn;
meta = with lib; {
description = "An org-mode site engine";
homepage = "https://engine.arcology.garden/";
license = licenses.unfree;
maintainers = with maintainers; [ rrix ];
};
}
#+end_src
** Dev Environment
=nix develop= or =nix-shell= will set you up with an environment that has Python programming dependencies available.
#+begin_src nix :tangle shell.nix
{ pkgs ? import <nixpkgs> {},
python3 ? pkgs.python3,
arroyo_rs ? pkgs.callPackage /home/rrix/org/arroyo/default.nix {},
}:
let
myPython = python3.withPackages( pp: with pp; [
pip
pytest
mypy
arrow
arroyo_rs
django_4
django-prometheus
django-htmx
(django-stubs-ext.override { django = django_4; })
(django-stubs.override { django = django_4; })
gunicorn
polling
]);
in pkgs.mkShell {
packages = (with pkgs; [
maturin
myPython
pyright
black]);
RUST_SRC_PATH = "${pkgs.rust.packages.stable.rustPlatform.rustLibSrc}";
NIX_CONFIG = "builders =";
shellHook = ''
PYTHONPATH=${myPython}/${myPython.sitePackages}
'';
}
#+end_src
** A Flake to tie everything together and make it possible to run remotely
Nix is really going this direction, I'm not sure it's worthwhile but I'm going to see how to adapt to this world. It should be possible to =nix run= a few apps to be able to operate the arcology.
#+begin_src nix :tangle flake.nix
{
description = "Arcology Site Engine, Django Edition";
inputs.nixpkgs.follows = "arroyo_rs/nixpkgs";
inputs.flake-utils.url = "github:numtide/flake-utils";
inputs.arroyo_rs.url = "git+https://code.rix.si/rrix/arroyo";
outputs = { self, nixpkgs, flake-utils, arroyo_rs }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs {
inherit system;
config.allowUnfree = true;
};
python3 = pkgs.python3;
arroyo = arroyo_rs.packages.${system}.default;
in
{
devShells.default = pkgs.callPackage ./shell.nix {
inherit python3;
arroyo_rs = arroyo;
};
packages = rec {
arcology = pkgs.callPackage ./default.nix {
inherit python3;
arroyo_rs = arroyo;
};
inherit arroyo;
default = arcology;
};
apps = rec {
arcology = flake-utils.lib.mkApp {
drv = self.packages.${system}.arcology;
exePath = "/bin/arcology";
};
# he he he
arroyo = flake-utils.lib.mkApp {
drv = self.packages.${system}.arroyo;
exePath = "/bin/arroyo";
};
default = arcology;
};
}
);
}
#+end_src
*** NEXT expose nixos modules and home manager modules here to aid re-bootstrap
** Direnv
[[id:45fc2a02-fcd0-40c6-a29e-897c0ee7b1c7][direnv]] fucking rules.
#+begin_src python :tangle .envrc
use flake
#+end_src
** Gitignore
#+begin_src python :tangle .gitignore
arcology.egg-info
__pycache__
venv
result
.direnv
db.sqlite3
#+end_src
* Django bootstraps
#+begin_src python :tangle arcology/__init__.py
import os
import sys
def django_manage():
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "arcology.settings")
from django.core.management import execute_from_command_line
execute_from_command_line(sys.argv)
#+end_src
this and a bit in =pyproject.toml= lets you just type =arcology watchfiles= to invoke a manage.py command.
These are generated scaffolds for now, basically the manage.py and -m arcology are the same and that is annoying, but i'll fix it some day.
#+begin_src python :tangle manage.py
#!/nix/store/c3cjxhn73xa5s8fm79w95d0879bijp04-python3-3.10.13/bin/python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'arcology.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == '__main__':
main()
#+end_src
#+begin_src python :tangle arcology/asgi.py
"""
ASGI config for arcology project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "arcology.settings")
application = get_asgi_application()
#+end_src
#+begin_src python :tangle arcology/wsgi.py
"""
WSGI config for arcology project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "arcology.settings")
application = get_wsgi_application()
#+end_src
* Middlewares
** User-Agent break-down
:PROPERTIES:
:ID: 20240213T120603.921365
:ROAM_ALIASES: arcology.agent_utils.AgentClassification
:END:
This =AgentClassification= enumeration class can take a User Agent header and map it to one of a handful of groups, which a user has the ability to extend. =AgentClassification.from_request(request)= will return a string from an enumeration, this is probably useful in labeling metrics or site statistics.
#+NAME: agent_classifications
| User Agent Substring | Enumeration |
|-------------------------------+-------------|
| prometheus | INTERNAL |
| feediverse | INTERNAL |
| Synapse | MATRIX |
| Element | MATRIX |
| SubwayTooter | APP |
| Dalvik | APP |
| Nextcloud-android | APP |
| Pleroma | FEDIVERSE |
| Mastodon/ | FEDIVERSE |
| Akkoma | FEDIVERSE |
| Friendica | FEDIVERSE |
| FoundKey | FEDIVERSE |
| MissKey | FEDIVERSE |
| CalcKey | FEDIVERSE |
| gotosocial | FEDIVERSE |
| Epicyon | FEDIVERSE |
| feedparser | FEED |
| granary | FEED |
| Tiny Tiny RSS | FEED |
| Go_NEB | FEED |
| Gwene | FEED |
| Feedbin | FEED |
| NetNewsWire | FEED |
| FreshRSS | FEED |
| SimplePie | FEED |
| Elfeed | FEED |
| inoreader | FEED |
| Reeder | FEED |
| Miniflux | FEED |
| Bot | BOT |
| bot | BOT |
| Poduptime | BOT |
| aiohttp | AUTOMATION |
| python-requests | AUTOMATION |
| Go-http-client | AUTOMATION |
| curl/ | AUTOMATION |
| wget/ | AUTOMATION |
| keybase-proofs/ | AUTOMATION |
| InternetMeasurement | CRAWLER |
| CensysInspect | CRAWLER |
| scaninfo@paloaltonetworks.com | CRAWLER |
| SEOlyt/ | CRAWLER |
| Sogou web spider/ | CRAWLER |
| Chrome/ | BROWSER |
| Firefox/ | BROWSER |
| DuckDuckGo/ | BROWSER |
| Safari/ | BROWSER |
| Opera/ | BROWSER |
| ddg_android/ | BROWSER |
#+begin_src python :tangle arcology/agent_utils.py :noweb yes
from __future__ import annotations
import logging
from typing import List
from enum import Enum
logger = logging.getLogger(__name__)
class AgentClassification(str, Enum):
NO_UA = "no-ua"
UNKNOWN = "unknown"
<<make_enum()>>
def __str__(self):
return self.value
@classmethod
def from_request(cls, request) -> AgentClassification:
user_agent = request.headers.get("User-Agent")
if user_agent == "":
return cls.NO_UA
if user_agent is None:
return cls.NO_UA
<<agent_classifier()>>
logger.warn(f"Unknown User-Agent: {user_agent}")
return cls.UNKNOWN
#+end_src
#+name: make_enum
#+begin_src emacs-lisp :var tbl=agent_classifications
(thread-last
tbl
(mapcar (pcase-lambda (`(,substring ,enum)) enum))
(-uniq)
(mapcar (lambda (enum) (format "%s = \"%s\"\n" enum (downcase enum))))
(apply #'concat))
#+end_src
#+name: agent_classifier
#+begin_src emacs-lisp :var tbl=agent_classifications
(thread-last
tbl
(mapcar (pcase-lambda (`(,substring ,enum))
(concat "if '" substring "' in user_agent:" "\n"
" return cls." enum "\n")))
(apply #'concat))
#+end_src
** File-backed HTML/Atom cache
:PROPERTIES:
:ID: 20240305T122458.841243
:ROAM_ALIASES: "arcology.file_cache.str_file_cache decorator"
:END:
I got away with using =functools.lru_cache= with the FastAPI prototype because uvicorn was single-process, but now we're deploying a WSGI app on multi-process =gunicorn= so the memory that the =lru_cache= writes to is not shared between the processes[fn:1:Maybe some day the GIL won't get in the way, alas]. I don't feel like trying to get the Arcology to work as ASGI Django is worth the trouble, there would be too many multi-colored functions duplicated between the sync workers and the async workers.
There are currently a handful of hot cache points in the code-base they're all caching big huge strings. Django's [[https://docs.djangoproject.com/en/5.0/topics/cache/][cache framework]] solves all of this handily, but it doesn't provide a memoizing decorator. It's easy enough to write our own, let's see:
I want to do this:
#+begin_src python
from arcology.cache_decorator import cache
@cache(key_prefix="local_test")
def gimme(hk):
return "hello, world!"
gimme(1)
#+end_src
Writing a wrapper like this is sort of funny to look at, so let's step through it.
Consider the =@fc.str_file_cache()= invocation above.
- That calls the outer-most function =cache= below, which returns the un-evaluated function =return_decoration= with some configuration variables in-scope.
- The decorator system then invokes *that* function, passing the =gimme= function in to it
- *that* returns a =wrapper= function when evaluated which is the thing that is actually invoked when =gimme(1)= is invoked.
- The inner wrapper function calculates a cache key similary to =functools.lru_cache= and checks the Django cache to see if there's anything matching that key, or storing and returning the value of the original =gimme= function.
If it makes more sense, it may be helpful to think that the =@= in the code is evaluating the function returned by the statement after. If the statement is a naked function, it'll just evaluate it, but if you say =@cache()= it will decorate =gimme= with the return value of =cache()=, which is another wrapper function.
All this nesting is necessary to pass arguments in to the decorator, and to have access to the inner function's arguments to calculate the hash key.
#+begin_src python :tangle arcology/cache_decorator.py
import pathlib
from django.core.cache import caches
import logging
logger = logging.getLogger(__name__)
def cache(key_prefix="", cache_connection="default", expire_secs=600):
def return_decoration(func):
def wrapper(*args, **kwargs):
cache = caches["default"]
key = args
for k, v in kwargs.items():
key += tuple(k,v)
cache_key = f"{key_prefix}/{hash(key)}"
ret = cache.get(cache_key)
if ret is None:
logger.debug(f"cache_miss {cache_key}")
ret = func(*args, **kwargs)
cache.set(cache_key, ret, expire_secs)
else:
logger.debug(f"cache_hit {cache_key}")
return ret
return wrapper
return return_decoration
#+end_src
*** NEXT I need to make sure to write some code to do cache-invalidation before it becomes a problem, too...
could also just use [[https://www.man7.org/linux/man-pages/man8/systemd-tmpfiles.8.html][=systemd-tmpfiles=]]..!