re-factor HTML re-hydrating, add fc-cloze rewrites

main
Ryan Rix 10 months ago
parent b3d53e6bd6
commit 67c179faa2

@ -251,45 +251,104 @@ import re
from typing import Optional
from arcology.key import id_to_arcology_key, file_to_arcology_key
class HTMLRewriter():
def __init__(self, session):
self.res_404 = 'href="/404?missing={key}" class="dead-link"'
self.session = session
def replace(match):
raise NotImplementedError()
def re(self):
raise NotImplementedError()
def do(self, output_html):
return re.sub(self.re(), self.replace, output_html)
#+end_src
Rewriting the HTML is a pretty straightforward affair using [[https://docs.python.org/3/library/re.html#re.sub][re.sub]] with callbacks rather than static replacements. The function which accepts the match object pull the node's [[id:arcology/arroyo/key][=ARCOLOGY_KEY=]], with an optional node-id anchor attached to it. This is then farmed out to =arcology_key_to_url= to be turned in to a URL. In this fashion, each =href= is replaced with a URL that will route to the target page, or a 404 page link with a CSS class attached.
Rewriting the HTML is a pretty straightforward affair using [[https://docs.python.org/3/library/re.html#re.sub][re.sub]] with callbacks rather than static replacements, with some abstraction sprinkled on top in the form of the =HTMLRewriter= superclass defined above. Each implementation of it provides a function which accepts the match object, and pulls the node's [[id:arcology/arroyo/key][=ARCOLOGY_KEY=]] with an optional node-id anchor attached to it. This is then farmed out to [[id:arcology/arroyo/key][=arcology_key_to_url=]] or so to be turned in to a URL. In this fashion, each =href= is replaced with a URL that will route to the target page, or a 404 page link with a CSS class attached.
#+begin_src python :tangle arcology/html.py
def rewrite_html(input_html: str, session: sqlmodel.Session) -> str:
"""
Run a series of replacement functions on the input HTML and return a new string.
"""
res_404 = 'href="/404?missing={key}" class="dead-link"'
I'm pretty sure this is all quite inefficient but as always I invoke [[id:personal_software_can_be_shitty][Personal Software Can Be Shitty]].
output_html = input_html
So ID links can be rewritten like:
def id_replacement_fn(match):
#+begin_src python :tangle arcology/html.py
class IDReplacementRewriter(HTMLRewriter):
def replace(self, match):
id = match.group(1)
key = id_to_arcology_key(id, session)
key = id_to_arcology_key(id, self.session)
if key is None:
return res_404.format(key=id)
return self.res_404.format(key=id)
else:
return 'class="internal" href="{url}"'.format(url=arcology_key_to_url(key))
output_html = re.sub(r'href="id:([^"]+)"', id_replacement_fn, output_html)
def re(self):
return r'href="id:([^"]+)"'
#+end_src
File links can be rewritten like:
def file_replacement_fn(match):
#+begin_src python :tangle arcology/html.py
class FileReplacementRewriter(HTMLRewriter):
def replace(self, match):
file = match.group(1)
if file is None:
return res_404.format(key=file)
key = file_to_arcology_key(file, session)
return self.res_404.format(key=file)
key = file_to_arcology_key(file, self.session)
if key is None:
return res_404.format(key=file)
return self.res_404.format(key=file)
else:
return 'class="file" href="{url}"'.format(url=arcology_key_to_url(key))
output_html = re.sub(r'href="file://([^"]+)"', file_replacement_fn, output_html)
def re(self):
return r'href="file://([^"]+)"'
#+end_src
[[id:cce/org-roam][org-roam]] stub links can be rewritten link. This one is a little wonky because =res_404= and the other regexen don't only want to operate on the anchor's attribute. This one wants to strip the =roam:= text from the =[[roam:Stub]]= links.
#+begin_src python :tangle arcology/html.py
class RoamReplacementRewriter(HTMLRewriter):
def replace(self, match):
return self.res_404.format(key=match.group(1)) + ">"
def re(self):
return r'href="roam:([^"]+)">roam:'
#+end_src
I also make some quality-of-life rewrites of my [[id:2e31b385-a003-4369-a136-c6b78c0917e1][org-fc]] cloze cards in to simple =<span>= elements with the hint embedded in them.
#+begin_src python :tangle arcology/html.py
class FCClozeReplacementRewriter(HTMLRewriter):
def replace(self, match):
main = match.group(1) or ""
hint = match.group(2) or ""
hint = re.sub(r"</?[^>]+>", "", hint)
return f"<span class='fc-cloze' title='{hint}'>{main}</span>"
def re(self):
return r'{{([^}]+)}{?([^}]+)?}?@[0-9]+}'
#+end_src
def roam_replacement_fn(match):
return res_404.format(key=match.group(1)) + "\">"
Invoke all these in a simple little harness:
output_html = re.sub(r'href="roam:([^"]+)" >roam:"', roam_replacement_fn, output_html)
#+begin_src python :tangle arcology/html.py
def rewrite_html(input_html: str, session: sqlmodel.Session) -> str:
"""
Run a series of replacement functions on the input HTML and return a new string.
"""
output_html = input_html
rewriters = [
IDReplacementRewriter(session),
FileReplacementRewriter(session),
RoamReplacementRewriter(session),
FCClozeReplacementRewriter(session),
]
for rewriter in rewriters:
output_html = rewriter.do(output_html)
return output_html
#+end_src
@ -579,7 +638,11 @@ Links in the [[id:cce/org-roam][org-roam]] database have a useful =type= column.
(let ((dest-file (caar (org-roam-db-query
[:select file :from nodes
:where (= id $s1)]
dest))))
dest)))
(source-title (caar (org-roam-db-query
[:select title :from nodes
:where (= id $s1)]
source))))
(when (and dest-file (arcology--published-page? dest-file)
(arroyo-db-query [:insert :into arcology-links
:values $v1]
@ -591,6 +654,15 @@ Links in the [[id:cce/org-roam][org-roam]] database have a useful =type= column.
(t nil))))
#+end_src
** INPROGRESS =source_title= should populate with the immediate parent header's title, not level 0
:LOGBOOK:
- State "INPROGRESS" from "NEXT" [2022-08-05 Fri 14:03]
:END:
It's passed in to =arroyo-arcology--insert-links= [[id:arcology/arroyo][Below]]. Not sure the better way to do that -- query =org-roam-db= in the insert function itself? good enough for now prolly.
deal with the title being fetched and populated in that function below if necessary.
* Arcology Nodes
:PROPERTIES:
:ID: arcology/arroyo/node

@ -18,40 +18,79 @@ from typing import Optional
from arcology.key import id_to_arcology_key, file_to_arcology_key
def rewrite_html(input_html: str, session: sqlmodel.Session) -> str:
"""
Run a series of replacement functions on the input HTML and return a new string.
"""
res_404 = 'href="/404?missing={key}" class="dead-link"'
class HTMLRewriter():
def __init__(self, session):
self.res_404 = 'href="/404?missing={key}" class="dead-link"'
self.session = session
output_html = input_html
def replace(match):
raise NotImplementedError()
def id_replacement_fn(match):
def re(self):
raise NotImplementedError()
def do(self, output_html):
return re.sub(self.re(), self.replace, output_html)
class IDReplacementRewriter(HTMLRewriter):
def replace(self, match):
id = match.group(1)
key = id_to_arcology_key(id, session)
key = id_to_arcology_key(id, self.session)
if key is None:
return res_404.format(key=id)
return self.res_404.format(key=id)
else:
return 'class="internal" href="{url}"'.format(url=arcology_key_to_url(key))
output_html = re.sub(r'href="id:([^"]+)"', id_replacement_fn, output_html)
def re(self):
return r'href="id:([^"]+)"'
def file_replacement_fn(match):
class FileReplacementRewriter(HTMLRewriter):
def replace(self, match):
file = match.group(1)
if file is None:
return res_404.format(key=file)
key = file_to_arcology_key(file, session)
return self.res_404.format(key=file)
key = file_to_arcology_key(file, self.session)
if key is None:
return res_404.format(key=file)
return self.res_404.format(key=file)
else:
return 'class="file" href="{url}"'.format(url=arcology_key_to_url(key))
output_html = re.sub(r'href="file://([^"]+)"', file_replacement_fn, output_html)
def re(self):
return r'href="file://([^"]+)"'
class RoamReplacementRewriter(HTMLRewriter):
def replace(self, match):
return self.res_404.format(key=match.group(1)) + ">"
def re(self):
return r'href="roam:([^"]+)">roam:'
class FCClozeReplacementRewriter(HTMLRewriter):
def replace(self, match):
main = match.group(1) or ""
hint = match.group(2) or ""
hint = re.sub(r"</?[^>]+>", "", hint)
return f"<span class='fc-cloze' title='{hint}'>{main}</span>"
def re(self):
return r'{{([^}]+)}{?([^}]+)?}?@[0-9]+}'
def rewrite_html(input_html: str, session: sqlmodel.Session) -> str:
"""
Run a series of replacement functions on the input HTML and return a new string.
"""
output_html = input_html
def roam_replacement_fn(match):
return res_404.format(key=match.group(1)) + "\">"
rewriters = [
IDReplacementRewriter(session),
FileReplacementRewriter(session),
RoamReplacementRewriter(session),
FCClozeReplacementRewriter(session),
]
output_html = re.sub(r'href="roam:([^"]+)" >roam:"', roam_replacement_fn, output_html)
for rewriter in rewriters:
output_html = rewriter.do(output_html)
return output_html

Loading…
Cancel
Save