485 lines
15 KiB
Org Mode
485 lines
15 KiB
Org Mode
:PROPERTIES:
|
|
:ID: arcology/django/scaffolding
|
|
:END:
|
|
#+TITLE: Arcology Project Scaffolding
|
|
#+filetags: :Project:
|
|
#+ARCOLOGY_KEY: arcology/scaffolding
|
|
|
|
* Dev Environment
|
|
|
|
** Python Project
|
|
|
|
The =pyproject.toml= file is slowly starting consume all of the different configuration files a [[id:cce/python][Python]] project needs. that's nice.
|
|
|
|
#+begin_src python :tangle pyproject.toml
|
|
[project]
|
|
name = "arcology"
|
|
version = "0.0.1"
|
|
description = "org-mode metadata query engine, publishing platform, and computer metaprogrammer"
|
|
# license = "Hey Smell This"
|
|
readme = "README.md"
|
|
dependencies = [
|
|
"django ~= 4.2", "django-stub", "django-prometheus",
|
|
"click ~=8.1", "polling", "arrow ~= 1.3.0", "gunicorn ~= 21.0", "htmx ~= 1.17",
|
|
"arroyo"
|
|
]
|
|
requires-python = ">=3.10"
|
|
authors = [
|
|
{ name = "Ryan Rix", email = "code@whatthefuck.computer" }
|
|
]
|
|
|
|
[project.scripts]
|
|
"arcology" = "arcology:django_manage"
|
|
|
|
[tool.setuptools]
|
|
package-dir = {"" = "."}
|
|
|
|
[tool.setuptools.package-data]
|
|
arcology = [
|
|
'settings/sites.json',
|
|
'static/arcology/js/*',
|
|
'static/arcology/css/*',
|
|
'static/arcology/fonts/*',
|
|
'templates/arcology/*',
|
|
'templates/*',
|
|
]
|
|
sitemap = [
|
|
'static/sitemap/js/*',
|
|
'static/sitemap/css/*',
|
|
'templates/sitemap/*',
|
|
]
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["."]
|
|
|
|
[build-system]
|
|
requires = ["setuptools>=61.0"]
|
|
build-backend = "setuptools.build_meta"
|
|
#+end_src
|
|
|
|
** Nix package for the service
|
|
=nix build= will spit out a python project that can be used in a NixOS definition. now where would we get one of those...? It's marked with =licenses.unfree= right now because I don't think [[id:20220116T143655.499306][Hey Smell This]] will pass the OSI sniff-test.
|
|
|
|
#+begin_src nix :tangle default.nix
|
|
{
|
|
pkgs ? import <nixpkgs> {},
|
|
lib ? pkgs.lib,
|
|
python3,
|
|
|
|
arroyo_rs,
|
|
}:
|
|
|
|
python3.pkgs.buildPythonPackage rec {
|
|
pname = "arcology";
|
|
version = "0.0.1";
|
|
format = "pyproject";
|
|
|
|
src = ./.;
|
|
|
|
nativeBuildInputs = with pkgs; [];
|
|
|
|
propagatedBuildInputs = (with pkgs; [
|
|
arroyo_rs
|
|
]) ++ (with python3.pkgs; [
|
|
arrow
|
|
click
|
|
django_4
|
|
django-prometheus
|
|
django-htmx
|
|
(django-stubs-ext.override { django = django_4; })
|
|
(django-stubs.override { django = django_4; })
|
|
gunicorn
|
|
polling
|
|
setuptools
|
|
]);
|
|
|
|
passthru.gunicorn = python3.pkgs.gunicorn;
|
|
|
|
meta = with lib; {
|
|
description = "An org-mode site engine";
|
|
homepage = "https://engine.arcology.garden/";
|
|
license = licenses.unfree;
|
|
maintainers = with maintainers; [ rrix ];
|
|
};
|
|
}
|
|
#+end_src
|
|
|
|
** Dev Environment
|
|
=nix develop= or =nix-shell= will set you up with an environment that has Python programming dependencies available.
|
|
|
|
#+begin_src nix :tangle shell.nix
|
|
{ pkgs ? import <nixpkgs> {},
|
|
python3 ? pkgs.python3,
|
|
|
|
arroyo_rs ? pkgs.callPackage /home/rrix/org/arroyo/default.nix {},
|
|
}:
|
|
let
|
|
myPython = python3.withPackages( pp: with pp; [
|
|
pip
|
|
pytest
|
|
mypy
|
|
|
|
arrow
|
|
arroyo_rs
|
|
django_4
|
|
django-prometheus
|
|
django-htmx
|
|
(django-stubs-ext.override { django = django_4; })
|
|
(django-stubs.override { django = django_4; })
|
|
gunicorn
|
|
polling
|
|
]);
|
|
in pkgs.mkShell {
|
|
packages = (with pkgs; [
|
|
maturin
|
|
myPython
|
|
|
|
pyright
|
|
black]);
|
|
RUST_SRC_PATH = "${pkgs.rust.packages.stable.rustPlatform.rustLibSrc}";
|
|
NIX_CONFIG = "builders =";
|
|
shellHook = ''
|
|
PYTHONPATH=${myPython}/${myPython.sitePackages}
|
|
'';
|
|
}
|
|
#+end_src
|
|
|
|
** A Flake to tie everything together and make it possible to run remotely
|
|
|
|
Nix is really going this direction, I'm not sure it's worthwhile but I'm going to see how to adapt to this world. It should be possible to =nix run= a few apps to be able to operate the arcology.
|
|
|
|
#+begin_src nix :tangle flake.nix
|
|
{
|
|
description = "Arcology Site Engine, Django Edition";
|
|
|
|
inputs.nixpkgs.follows = "arroyo_rs/nixpkgs";
|
|
inputs.flake-utils.url = "github:numtide/flake-utils";
|
|
inputs.arroyo_rs.url = "git+https://code.rix.si/rrix/arroyo";
|
|
|
|
outputs = { self, nixpkgs, flake-utils, arroyo_rs }:
|
|
flake-utils.lib.eachDefaultSystem (system:
|
|
let
|
|
pkgs = import nixpkgs {
|
|
inherit system;
|
|
config.allowUnfree = true;
|
|
};
|
|
|
|
python3 = pkgs.python3;
|
|
arroyo = arroyo_rs.packages.${system}.default;
|
|
in
|
|
{
|
|
devShells.default = pkgs.callPackage ./shell.nix {
|
|
inherit python3;
|
|
arroyo_rs = arroyo;
|
|
};
|
|
|
|
packages = rec {
|
|
arcology = pkgs.callPackage ./default.nix {
|
|
inherit python3;
|
|
arroyo_rs = arroyo;
|
|
};
|
|
inherit arroyo;
|
|
default = arcology;
|
|
};
|
|
|
|
apps = rec {
|
|
arcology = flake-utils.lib.mkApp {
|
|
drv = self.packages.${system}.arcology;
|
|
exePath = "/bin/arcology";
|
|
};
|
|
# he he he
|
|
arroyo = flake-utils.lib.mkApp {
|
|
drv = self.packages.${system}.arroyo;
|
|
exePath = "/bin/arroyo";
|
|
};
|
|
default = arcology;
|
|
};
|
|
}
|
|
);
|
|
}
|
|
#+end_src
|
|
|
|
*** NEXT expose nixos modules and home manager modules here to aid re-bootstrap
|
|
** Direnv
|
|
|
|
[[id:45fc2a02-fcd0-40c6-a29e-897c0ee7b1c7][direnv]] fucking rules.
|
|
|
|
#+begin_src python :tangle .envrc
|
|
use flake
|
|
#+end_src
|
|
|
|
** Gitignore
|
|
|
|
#+begin_src python :tangle .gitignore
|
|
arcology.egg-info
|
|
__pycache__
|
|
|
|
venv
|
|
result
|
|
.direnv
|
|
|
|
db.sqlite3
|
|
#+end_src
|
|
|
|
* Django bootstraps
|
|
|
|
#+begin_src python :tangle arcology/__init__.py
|
|
import os
|
|
import sys
|
|
|
|
def django_manage():
|
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "arcology.settings")
|
|
from django.core.management import execute_from_command_line
|
|
execute_from_command_line(sys.argv)
|
|
#+end_src
|
|
|
|
this and a bit in =pyproject.toml= lets you just type =arcology watchfiles= to invoke a manage.py command.
|
|
|
|
These are generated scaffolds for now, basically the manage.py and -m arcology are the same and that is annoying, but i'll fix it some day.
|
|
|
|
#+begin_src python :tangle manage.py
|
|
#!/nix/store/c3cjxhn73xa5s8fm79w95d0879bijp04-python3-3.10.13/bin/python
|
|
"""Django's command-line utility for administrative tasks."""
|
|
import os
|
|
import sys
|
|
|
|
|
|
def main():
|
|
"""Run administrative tasks."""
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'arcology.settings')
|
|
try:
|
|
from django.core.management import execute_from_command_line
|
|
except ImportError as exc:
|
|
raise ImportError(
|
|
"Couldn't import Django. Are you sure it's installed and "
|
|
"available on your PYTHONPATH environment variable? Did you "
|
|
"forget to activate a virtual environment?"
|
|
) from exc
|
|
execute_from_command_line(sys.argv)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
#+end_src
|
|
|
|
#+begin_src python :tangle arcology/asgi.py
|
|
"""
|
|
ASGI config for arcology project.
|
|
|
|
It exposes the ASGI callable as a module-level variable named ``application``.
|
|
|
|
For more information on this file, see
|
|
https://docs.djangoproject.com/en/3.2/howto/deployment/asgi/
|
|
"""
|
|
|
|
import os
|
|
|
|
from django.core.asgi import get_asgi_application
|
|
|
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "arcology.settings")
|
|
|
|
application = get_asgi_application()
|
|
#+end_src
|
|
|
|
#+begin_src python :tangle arcology/wsgi.py
|
|
"""
|
|
WSGI config for arcology project.
|
|
|
|
It exposes the WSGI callable as a module-level variable named ``application``.
|
|
|
|
For more information on this file, see
|
|
https://docs.djangoproject.com/en/3.2/howto/deployment/wsgi/
|
|
"""
|
|
|
|
import os
|
|
|
|
from django.core.wsgi import get_wsgi_application
|
|
|
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "arcology.settings")
|
|
|
|
application = get_wsgi_application()
|
|
#+end_src
|
|
|
|
* Middlewares
|
|
** User-Agent break-down
|
|
:PROPERTIES:
|
|
:ID: 20240213T120603.921365
|
|
:ROAM_ALIASES: arcology.agent_utils.AgentClassification
|
|
:END:
|
|
|
|
This =AgentClassification= enumeration class can take a User Agent header and map it to one of a handful of groups, which a user has the ability to extend. =AgentClassification.from_request(request)= will return a string from an enumeration, this is probably useful in labeling metrics or site statistics.
|
|
|
|
|
|
#+NAME: agent_classifications
|
|
| User Agent Substring | Enumeration |
|
|
|-------------------------------+-------------|
|
|
| prometheus | INTERNAL |
|
|
| feediverse | INTERNAL |
|
|
| Synapse | MATRIX |
|
|
| Element | MATRIX |
|
|
| SubwayTooter | APP |
|
|
| Dalvik | APP |
|
|
| Nextcloud-android | APP |
|
|
| Pleroma | FEDIVERSE |
|
|
| Mastodon/ | FEDIVERSE |
|
|
| Akkoma | FEDIVERSE |
|
|
| Friendica | FEDIVERSE |
|
|
| FoundKey | FEDIVERSE |
|
|
| MissKey | FEDIVERSE |
|
|
| CalcKey | FEDIVERSE |
|
|
| gotosocial | FEDIVERSE |
|
|
| Epicyon | FEDIVERSE |
|
|
| feedparser | FEED |
|
|
| granary | FEED |
|
|
| Tiny Tiny RSS | FEED |
|
|
| Go_NEB | FEED |
|
|
| Gwene | FEED |
|
|
| Feedbin | FEED |
|
|
| NetNewsWire | FEED |
|
|
| FreshRSS | FEED |
|
|
| SimplePie | FEED |
|
|
| Elfeed | FEED |
|
|
| inoreader | FEED |
|
|
| Reeder | FEED |
|
|
| Miniflux | FEED |
|
|
| Bot | BOT |
|
|
| bot | BOT |
|
|
| Poduptime | BOT |
|
|
| aiohttp | AUTOMATION |
|
|
| python-requests | AUTOMATION |
|
|
| Go-http-client | AUTOMATION |
|
|
| curl/ | AUTOMATION |
|
|
| wget/ | AUTOMATION |
|
|
| keybase-proofs/ | AUTOMATION |
|
|
| InternetMeasurement | CRAWLER |
|
|
| CensysInspect | CRAWLER |
|
|
| scaninfo@paloaltonetworks.com | CRAWLER |
|
|
| SEOlyt/ | CRAWLER |
|
|
| Sogou web spider/ | CRAWLER |
|
|
| Chrome/ | BROWSER |
|
|
| Firefox/ | BROWSER |
|
|
| DuckDuckGo/ | BROWSER |
|
|
| Safari/ | BROWSER |
|
|
| Opera/ | BROWSER |
|
|
| ddg_android/ | BROWSER |
|
|
|
|
#+begin_src python :tangle arcology/agent_utils.py :noweb yes
|
|
from __future__ import annotations
|
|
import logging
|
|
from typing import List
|
|
from enum import Enum
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class AgentClassification(str, Enum):
|
|
NO_UA = "no-ua"
|
|
UNKNOWN = "unknown"
|
|
<<make_enum()>>
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
@classmethod
|
|
def from_request(cls, request) -> AgentClassification:
|
|
user_agent = request.headers.get("User-Agent")
|
|
if user_agent == "":
|
|
return cls.NO_UA
|
|
if user_agent is None:
|
|
return cls.NO_UA
|
|
<<agent_classifier()>>
|
|
|
|
logger.warn(f"Unknown User-Agent: {user_agent}")
|
|
|
|
return cls.UNKNOWN
|
|
#+end_src
|
|
|
|
#+name: make_enum
|
|
#+begin_src emacs-lisp :var tbl=agent_classifications
|
|
(thread-last
|
|
tbl
|
|
(mapcar (pcase-lambda (`(,substring ,enum)) enum))
|
|
(-uniq)
|
|
(mapcar (lambda (enum) (format "%s = \"%s\"\n" enum (downcase enum))))
|
|
(apply #'concat))
|
|
#+end_src
|
|
|
|
#+name: agent_classifier
|
|
#+begin_src emacs-lisp :var tbl=agent_classifications
|
|
(thread-last
|
|
tbl
|
|
(mapcar (pcase-lambda (`(,substring ,enum))
|
|
(concat "if '" substring "' in user_agent:" "\n"
|
|
" return cls." enum "\n")))
|
|
(apply #'concat))
|
|
#+end_src
|
|
|
|
** File-backed HTML/Atom cache
|
|
:PROPERTIES:
|
|
:ID: 20240305T122458.841243
|
|
:ROAM_ALIASES: "arcology.file_cache.str_file_cache decorator"
|
|
:END:
|
|
|
|
I got away with using =functools.lru_cache= with the FastAPI prototype because uvicorn was single-process, but now we're deploying a WSGI app on multi-process =gunicorn= so the memory that the =lru_cache= writes to is not shared between the processes[fn:1:Maybe some day the GIL won't get in the way, alas]. I don't feel like trying to get the Arcology to work as ASGI Django is worth the trouble, there would be too many multi-colored functions duplicated between the sync workers and the async workers.
|
|
|
|
There are currently a handful of hot cache points in the code-base they're all caching big huge strings. Django's [[https://docs.djangoproject.com/en/5.0/topics/cache/][cache framework]] solves all of this handily, but it doesn't provide a memoizing decorator. It's easy enough to write our own, let's see:
|
|
|
|
I want to do this:
|
|
|
|
#+begin_src python
|
|
from arcology.cache_decorator import cache
|
|
|
|
@cache(key_prefix="local_test")
|
|
def gimme(hk):
|
|
return "hello, world!"
|
|
|
|
gimme(1)
|
|
#+end_src
|
|
|
|
Writing a wrapper like this is sort of funny to look at, so let's step through it.
|
|
|
|
Consider the =@fc.str_file_cache()= invocation above.
|
|
- That calls the outer-most function =cache= below, which returns the un-evaluated function =return_decoration= with some configuration variables in-scope.
|
|
- The decorator system then invokes *that* function, passing the =gimme= function in to it
|
|
- *that* returns a =wrapper= function when evaluated which is the thing that is actually invoked when =gimme(1)= is invoked.
|
|
- The inner wrapper function calculates a cache key similary to =functools.lru_cache= and checks the Django cache to see if there's anything matching that key, or storing and returning the value of the original =gimme= function.
|
|
|
|
If it makes more sense, it may be helpful to think that the =@= in the code is evaluating the function returned by the statement after. If the statement is a naked function, it'll just evaluate it, but if you say =@cache()= it will decorate =gimme= with the return value of =cache()=, which is another wrapper function.
|
|
|
|
All this nesting is necessary to pass arguments in to the decorator, and to have access to the inner function's arguments to calculate the hash key.
|
|
|
|
#+begin_src python :tangle arcology/cache_decorator.py
|
|
import pathlib
|
|
from django.core.cache import caches
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def cache(key_prefix="", cache_connection="default", expire_secs=600):
|
|
def return_decoration(func):
|
|
|
|
def wrapper(*args, **kwargs):
|
|
cache = caches["default"]
|
|
key = args
|
|
for k, v in kwargs.items():
|
|
key += tuple(k,v)
|
|
cache_key = f"{key_prefix}/{hash(key)}"
|
|
|
|
ret = cache.get(cache_key)
|
|
if ret is None:
|
|
logger.debug(f"cache_miss {cache_key}")
|
|
ret = func(*args, **kwargs)
|
|
cache.set(cache_key, ret, expire_secs)
|
|
else:
|
|
logger.debug(f"cache_hit {cache_key}")
|
|
return ret
|
|
|
|
return wrapper
|
|
return return_decoration
|
|
#+end_src
|
|
|
|
*** NEXT I need to make sure to write some code to do cache-invalidation before it becomes a problem, too...
|
|
|
|
could also just use [[https://www.man7.org/linux/man-pages/man8/systemd-tmpfiles.8.html][=systemd-tmpfiles=]]..!
|