Compare commits

...

3 Commits

Author SHA1 Message Date
Ryan Rix 0b6b7007d1 random crapo 2024-03-13 14:05:46 -07:00
Ryan Rix e3cd5dce42 re-order agent classification checks so that browsers match less 2024-03-13 14:04:57 -07:00
Ryan Rix 49c9c9e0b9 update arroyo to fix subheading exports 2024-03-13 14:04:06 -07:00
10 changed files with 55 additions and 36 deletions

View File

@ -3,8 +3,11 @@
:END:
#+TITLE: The Arcology Project: Django Edition Landing Page
#+FILETAGS: :Project:README:
#+ARCOLOGY_ALLOW_CRAWL: t
#+ARCOLOGY_KEY: arcology/index
* The Arcology Project
[[id:1d917282-ecf4-4d4c-ba49-628cbb4bb8cc][The Arcology Project]] is the publishing platform and social layer of my [[id:60f710b2-6a1f-44be-bc13-dfe01e46d4e3][Concept Operating System]]'s [[id:knowledge_base][Knowledge Management]] system. It is a web publishing platform built to present my literate programming platform and my public knowledge.

View File

@ -1206,7 +1206,6 @@ The FastaAPI site had a "boredom mode" which would disable fonts and colors beca
</html>
#+end_src
*** CSS
:PROPERTIES:
:ID: 20231229T164608.815737
@ -1274,6 +1273,10 @@ a {
a::visited {
color: var(--secondary);
}
code {
font-style: normal;
}
#+end_src
There are per-site CSS in [[id:20231229T164611.256424][The Arcology's Site List]].

View File

@ -10,7 +10,6 @@ class AgentClassification(str, Enum):
NO_UA = "no-ua"
UNKNOWN = "unknown"
INTERNAL = "internal"
BROWSER = "browser"
MATRIX = "matrix"
APP = "app"
FEDIVERSE = "fediverse"
@ -18,6 +17,7 @@ class AgentClassification(str, Enum):
BOT = "bot"
AUTOMATION = "automation"
CRAWLER = "crawler"
BROWSER = "browser"
def __str__(self):
@ -34,18 +34,6 @@ class AgentClassification(str, Enum):
return cls.INTERNAL
if 'feediverse' in user_agent:
return cls.INTERNAL
if 'Chrome/' in user_agent:
return cls.BROWSER
if 'Firefox/' in user_agent:
return cls.BROWSER
if 'DuckDuckGo/' in user_agent:
return cls.BROWSER
if 'Safari/' in user_agent:
return cls.BROWSER
if 'Opera/' in user_agent:
return cls.BROWSER
if 'ddg_android/' in user_agent:
return cls.BROWSER
if 'Synapse' in user_agent:
return cls.MATRIX
if 'Element' in user_agent:
@ -128,6 +116,18 @@ class AgentClassification(str, Enum):
return cls.CRAWLER
if 'Sogou web spider/' in user_agent:
return cls.CRAWLER
if 'Chrome/' in user_agent:
return cls.BROWSER
if 'Firefox/' in user_agent:
return cls.BROWSER
if 'DuckDuckGo/' in user_agent:
return cls.BROWSER
if 'Safari/' in user_agent:
return cls.BROWSER
if 'Opera/' in user_agent:
return cls.BROWSER
if 'ddg_android/' in user_agent:
return cls.BROWSER
logger.warn(f"Unknown User-Agent: {user_agent}")

View File

@ -84,7 +84,7 @@ LOGGING = {
},
"arcology.cache_decorator": { # left as an example to change later.
"handlers": ["console"],
"level": "INFO",
"level": "DEBUG",
}
},
"root": {

View File

@ -129,4 +129,8 @@ a {
a::visited {
color: var(--secondary);
}
code {
font-style: normal;
}
/* CSS:4 ends here */

View File

@ -308,7 +308,7 @@ LOGGING = {
},
"arcology.cache_decorator": { # left as an example to change later.
"handlers": ["console"],
"level": "INFO",
"level": "DEBUG",
}
},
"root": {

View File

@ -6,11 +6,11 @@
"nixpkgs": "nixpkgs"
},
"locked": {
"lastModified": 1708999083,
"narHash": "sha256-BEbba8BkuA5GA8LOhXT6kSX+q9hbdp137lTEgb7iaP8=",
"lastModified": 1710219899,
"narHash": "sha256-IGWuukeJ7egGscXKDAksdPCdftqnQfsHvzQMSYq/Q84=",
"ref": "refs/heads/main",
"rev": "1c8c06702e1ad6145bed80f9ad9c3ec4b7073eda",
"revCount": 160,
"rev": "1b7e71d9e6b459fbdddcd51b04bda20085e90959",
"revCount": 161,
"type": "git",
"url": "https://code.rix.si/rrix/arroyo"
},
@ -42,11 +42,11 @@
"systems": "systems_2"
},
"locked": {
"lastModified": 1705309234,
"narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {

View File

@ -516,6 +516,9 @@ class Link(EMOM('link'), models.Model):
def __repr__(self) -> str:
return f"<Link (from: {self.source_heading_id}, to: {self.dest_heading_id}, text: {self.title})>"
def __str__(self) -> str:
return self.__repr__()
def to_backlink_html(self) -> str:
try:
h = self.source_heading

View File

@ -236,6 +236,9 @@ class Link(EMOM('link'), models.Model):
def __repr__(self) -> str:
return f"<Link (from: {self.source_heading_id}, to: {self.dest_heading_id}, text: {self.title})>"
def __str__(self) -> str:
return self.__repr__()
def to_backlink_html(self) -> str:
try:
h = self.source_heading

View File

@ -316,12 +316,6 @@ This =AgentClassification= enumeration class can take a User Agent header and ma
|-------------------------------+-------------|
| prometheus | INTERNAL |
| feediverse | INTERNAL |
| Chrome/ | BROWSER |
| Firefox/ | BROWSER |
| DuckDuckGo/ | BROWSER |
| Safari/ | BROWSER |
| Opera/ | BROWSER |
| ddg_android/ | BROWSER |
| Synapse | MATRIX |
| Element | MATRIX |
| SubwayTooter | APP |
@ -363,6 +357,12 @@ This =AgentClassification= enumeration class can take a User Agent header and ma
| scaninfo@paloaltonetworks.com | CRAWLER |
| SEOlyt/ | CRAWLER |
| Sogou web spider/ | CRAWLER |
| Chrome/ | BROWSER |
| Firefox/ | BROWSER |
| DuckDuckGo/ | BROWSER |
| Safari/ | BROWSER |
| Opera/ | BROWSER |
| ddg_android/ | BROWSER |
#+begin_src python :tangle arcology/agent_utils.py :noweb yes
from __future__ import annotations
@ -422,9 +422,9 @@ class AgentClassification(str, Enum):
I got away with using =functools.lru_cache= with the FastAPI prototype because uvicorn was single-process, but now we're deploying a WSGI app on multi-process =gunicorn= so the memory that the =lru_cache= writes to is not shared between the processes[fn:1:Maybe some day the GIL won't get in the way, alas]. I don't feel like trying to get the Arcology to work as ASGI Django is worth the trouble, there would be too many multi-colored functions duplicated between the sync workers and the async workers.
There are currently four invocations of =lru_cache= in this code-base they're all caching big huge strings. It's easier perhaps to swap in a thing which writes those HTML strings to files. The call-sites all have the source-file's =sha256= sum so that those =lru_cache= functions have a cache-breaking key, this can still be a cache-breaking key, just on the filesystem instead. God-speed to whoever deploys the Arcology to a multi-system Kubernetes cluster.
There are currently a handful of hot cache points in the code-base they're all caching big huge strings. Django's [[https://docs.djangoproject.com/en/5.0/topics/cache/][cache framework]] solves all of this handily, but it doesn't provide a memoizing decorator. It's easy enough to write our own, let's see:
So now you can do this:
I want to do this:
#+begin_src python
from arcology.cache_decorator import cache
@ -436,14 +436,17 @@ def gimme(hk):
gimme(1)
#+end_src
Writing a wrapper like this is sort of funny to look at.
Writing a wrapper like this is sort of funny to look at, so let's step through it.
Consider the =@fc.str_file_cache()= invocation above.
- That calls the outer-most function =str_file_cache= below, which returns the un-evaluated function =return_decoration=
- The decorator system then invokes *that*, passing the =gimme= function in to it
- *that* returns a =wrapper= function which is the thing that is invoked when =gimme(1)= is invoked.
- That calls the outer-most function =cache= below, which returns the un-evaluated function =return_decoration= with some configuration variables in-scope.
- The decorator system then invokes *that* function, passing the =gimme= function in to it
- *that* returns a =wrapper= function when evaluated which is the thing that is actually invoked when =gimme(1)= is invoked.
- The inner wrapper function calculates a cache key similary to =functools.lru_cache= and checks the Django cache to see if there's anything matching that key, or storing and returning the value of the original =gimme= function.
All this nesting is necessary to keep pass arguments in to the decorator, and to have access to the inner function's arguments to calculate the hash key.
If it makes more sense, it may be helpful to think that the =@= in the code is evaluating the function returned by the statement after. If the statement is a naked function, it'll just evaluate it, but if you say =@cache()= it will decorate =gimme= with the return value of =cache()=, which is another wrapper function.
All this nesting is necessary to pass arguments in to the decorator, and to have access to the inner function's arguments to calculate the hash key.
#+begin_src python :tangle arcology/cache_decorator.py
import pathlib