253 lines
8.6 KiB
Org Mode
253 lines
8.6 KiB
Org Mode
:PROPERTIES:
|
|
:ID: arcology/routing
|
|
:END:
|
|
#+TITLE: Arcology Routing Logic
|
|
#+filetags: :Project:Arcology:
|
|
|
|
#+ARCOLOGY_KEY: arcology/routing
|
|
#+ARCOLOGY_ALLOW_CRAWL: t
|
|
#+AUTO_TANGLE: t
|
|
|
|
This is a way to abstract URL logic between the development domain and the production domain without so much fuss. It's a pair of modules that have an identical export. Each one has:
|
|
- =decorate_app(app: FastAPI) -> FastAPI= - this is called in the FastAPI server setup to inject different routes depending on the =arcology_env= [[id:20220117T162655.535047][Configuration]]
|
|
|
|
I've considered putting URL generation in these and may yet do so but that is in the [[id:arcology/arroyo/key][=arcology.key.ArcologyKey=]]. Probably a mistake.
|
|
|
|
* Shared Functions
|
|
:PROPERTIES:
|
|
:ID: 20220225T211344.682539
|
|
:ROAM_ALIASES: render_page_from_key
|
|
:END:
|
|
|
|
Load up the template and the imports that =render_page_from_key= uses...
|
|
|
|
#+begin_src python :tangle arcology/routing/util.py :mkdirp yes
|
|
from fastapi import Request
|
|
from sqlmodel import Session
|
|
from fastapi.templating import Jinja2Templates
|
|
from typing import Optional
|
|
|
|
from arcology.arroyo import Page, engine
|
|
import arcology.html as html
|
|
|
|
from arcology.config import get_settings, Environment
|
|
|
|
templ_dir = "arcology/templates"
|
|
templates = Jinja2Templates(directory=templ_dir)
|
|
#+end_src
|
|
|
|
This thing is responsible for loading the [[id:arcology/arroyo/page][Arcology Page]], and generating an HTML response and packaging it in to a FastAPI response format. It does a lot and the fact that it's pulling modules from all over the code base gives me great anxiety! this is probably something to really consider refactoring or putting better abstractions in to the Page module... or maybe not.
|
|
|
|
#+begin_src python :tangle arcology/routing/util.py :mkdirp yes
|
|
import asyncio
|
|
|
|
from fastapi import HTTPException
|
|
|
|
from arcology.feeds import make_feed_entries
|
|
|
|
async def render_page_from_key(request: Request, key: str, engine, site) -> Optional[templates.TemplateResponse]:
|
|
with Session(engine) as session:
|
|
p = Page.from_key(key, session)
|
|
if p is None:
|
|
raise HTTPException(status_code=404, detail="Page not found.")
|
|
dhtml = await asyncio.create_task(p.document_html())
|
|
bhtml = await asyncio.create_task(p.backlink_html())
|
|
document = html.rewrite_html(dhtml, session)
|
|
backlink = html.rewrite_html(bhtml, session)
|
|
feeds = make_feed_entries(p.get_site().key, session)
|
|
|
|
return templates.TemplateResponse("page.html.j2", dict(
|
|
site=site,
|
|
page=p,
|
|
document=document,
|
|
backlink=backlink,
|
|
request=request,
|
|
feeds=feeds,
|
|
))
|
|
#+end_src
|
|
|
|
* NEXT This routing split between local and prod doesn't work because the routes aren't domain aware. and very greedy.
|
|
* [#A] Arcology Domain-Aware Routing
|
|
:PROPERTIES:
|
|
:ID: 20220225T175638.482695
|
|
:ROAM_ALIASES: "Arcology FastAPI Edge Endpoint" "Arcology Public Router" "Arcology Domain Router"
|
|
:END:
|
|
:LOGBOOK:
|
|
- State "INPROGRESS" from "NEXT" [2022-02-25 Fri 17:23]
|
|
:END:
|
|
|
|
There are only a handful of routes here, and they're, frankly, janky. A handful of static routes feed functionality in other parts of the site, but most of the heavy-lifting is done below in the =public_router= function...
|
|
|
|
#+begin_src python :tangle arcology/routing/domains.py :noweb yes
|
|
from fastapi import FastAPI, Request
|
|
from sqlmodel import Session
|
|
from fastapi.responses import HTMLResponse, FileResponse, Response
|
|
|
|
from arcology.routing.util import render_page_from_key, templates, crawlable_pages_for_site
|
|
from arcology.feeds import render_feed_from_file
|
|
from arcology.arroyo import engine
|
|
from arcology.key import ArcologyKey
|
|
from arcology.arroyo import Keyword
|
|
from arcology.sigma import sigma
|
|
|
|
|
|
def decorate_app(app: FastAPI) -> FastAPI:
|
|
<<robots-txt>>
|
|
|
|
<<sitemap>>
|
|
|
|
<<feedgen>>
|
|
|
|
<<primary-route>>
|
|
|
|
return app
|
|
#+end_src
|
|
|
|
** GET =/robots.txt=
|
|
:PROPERTIES:
|
|
:ID: arcology/routing/robots.txt
|
|
:ROAM_ALIASES: "Arcology Robots.txt Generator"
|
|
:END:
|
|
|
|
Inside of the cache whether a Page contains an =#+ARCOLOGY_ALLOW_CRAWL= file property which is *not* set to =nil=. Based on this, it's possible to create an =robots.txt= entry which only exposes pages I explicitly want crawled.
|
|
|
|
#+begin_src python :noweb-ref robots-txt
|
|
import logging
|
|
@app.get("/robots.txt", response_class=FileResponse, name="robots-txt")
|
|
async def robots_txt(request: Request):
|
|
with Session(engine) as session:
|
|
key = ArcologyKey.from_request(request).site_key
|
|
crawlable_pages = crawlable_pages_for_site(key, session)
|
|
return templates.TemplateResponse("robots.txt.j2", dict(
|
|
request=request,
|
|
pages=crawlable_pages,
|
|
))
|
|
#+end_src
|
|
|
|
#+begin_src jinja2 :tangle arcology/templates/robots.txt.j2
|
|
User-agent: GPTBot
|
|
Disallow: /
|
|
|
|
User-agent: ChatGPT-User
|
|
Disallow: /
|
|
|
|
User-agent: Google-Extended
|
|
Disallow: /
|
|
|
|
User-agent: CCBot
|
|
Disallow: /
|
|
|
|
User-Agent: *
|
|
Disallow: /
|
|
Allow: /static/css
|
|
{%- for page in pages %}
|
|
{%- set key = page.get_arcology_key() %}
|
|
{%- if key.rest == "index" %}
|
|
Allow: /$
|
|
{%- else %}
|
|
Allow: /{{key.rest}}$
|
|
{%- endif %}
|
|
{%- endfor %}
|
|
#+end_src
|
|
|
|
this goes in the [[id:20220225T211344.682539][Shared Functions]] module because I'm bad at information architecture.
|
|
|
|
#+begin_src python :tangle arcology/routing/util.py
|
|
from sqlmodel import select, SQLModel, Session
|
|
|
|
import arcology.arroyo as arroyo
|
|
from arcology.parse import *
|
|
|
|
|
|
def crawlable_pages_for_site(site_key: str, session: Session):
|
|
q = select(arroyo.Page) \
|
|
.where(arroyo.Page.site==print_sexp(site_key)) \
|
|
.where(arroyo.Page.allow_crawl!="nil")
|
|
return session.exec(q).all()
|
|
#+end_src
|
|
|
|
** GET =/sitemap= and =/sitemap/json=
|
|
|
|
Use [[id:20220711T151820.326251][SigmaJS]] to generate the [[id:arcology/sitemaps][Arcology Sitemap]].
|
|
|
|
#+begin_src python :noweb-ref sitemap
|
|
@app.get("/sitemap/", response_class=HTMLResponse, name="sitemap_page")
|
|
async def sitemap(request: Request):
|
|
site = ArcologyKey.from_request(request).site
|
|
return templates.TemplateResponse("sitemap.html.j2", dict(
|
|
request=request,
|
|
site=site,
|
|
))
|
|
|
|
@app.get("/sitemap/json", name="sitemap_json")
|
|
async def sitemap(request: Request):
|
|
return sigma(engine)
|
|
#+end_src
|
|
|
|
** GET =/feeds.json=
|
|
|
|
This generates a list of feeds seen by the [[id:arcology/arroyo/feed][arcology.arroyo.Feed]] database module and presents them in a simple JSON list to be used by other automation on my system like my [[id:20230125T143144.011175][Feediverse]] cross-posting bots. This is the same metadata which is used in the [[id:arcology/atom-gen][Arcology Feed Generator]] though all of that is currently using the underlying KVF store rather than the normalized database tables at the moment.
|
|
|
|
#+begin_src python :noweb-ref feedgen
|
|
from arcology.arroyo import Feed, engine
|
|
from sqlmodel import select, SQLModel, Session
|
|
from typing import List
|
|
|
|
def get_feeds(session: Session) -> List[Feed]:
|
|
q = select(Feed)
|
|
return session.exec(q).all()
|
|
|
|
@app.get("/feeds.json", response_model=List[Feed], name="feed-list")
|
|
async def feed_list(request: Request):
|
|
with Session(engine) as session:
|
|
feeds = get_feeds(session)
|
|
return feeds
|
|
#+end_src
|
|
|
|
*** NEXT Move =get_feeds= to feeds util module for the function which sticks the <link>s in <head>
|
|
|
|
** GET =/{key}.xml=
|
|
|
|
This is a shortcut in to the public router below for [[id:arcology/atom-gen][Arcology Feed Generator]] files, it's basically a hack to set the =media_type= in the response...
|
|
|
|
#+begin_src python :noweb-ref feedgen
|
|
@app.get("/{sub_key:path}.xml", response_class=Response, name="feed-route")
|
|
async def feed_route(request: Request, sub_key: str):
|
|
sub_key += ".xml" # dark laughter
|
|
return Response(content=(await public_router(request, sub_key)), media_type="application/atom+xml")
|
|
#+end_src
|
|
|
|
** GET =/{key}/?=
|
|
|
|
This does all the heavy lifting:
|
|
|
|
#+begin_src python :noweb-ref primary-route
|
|
@app.get("/{sub_key:path}", response_class=HTMLResponse, name="base-route")
|
|
@app.get("/{sub_key:path}/", response_class=HTMLResponse, name="base-route")
|
|
async def public_router(request: Request, sub_key: str):
|
|
key = None
|
|
|
|
if request.headers.get('host',"").startswith('localhost'):
|
|
key = ArcologyKey(key=sub_key)
|
|
else:
|
|
key = ArcologyKey.from_request(request)
|
|
|
|
with Session(engine) as session:
|
|
kw = Keyword.get("ARCOLOGY_FEED", key.key, session)
|
|
if kw is not None:
|
|
return await render_feed_from_file(request, kw.filename(), engine, key.site)
|
|
|
|
return await render_page_from_key(request, key.key, engine, key.site)
|
|
#+end_src
|
|
|
|
** NEXT better 404 management
|
|
|
|
show a textual page or maybe redirect to duckduckgo search lul
|
|
|
|
** NEXT HEAD =/{key}/=
|
|
|
|
I oughtta implement this to return metadata an whatnot for pages to generate previews on [[id:62538db5-d94a-47c3-9998-086ded91fd88][Fediverse]], Twitter, etc...
|
|
|
|
|
|
but what to do about short description metadata or header image...
|