arcology/roam/core.py

95 lines
3.5 KiB
Python

# [[file:../roam.org::*Parsing and Persisting an org-mode document][Parsing and Persisting an org-mode document:1]]
from typing import Optional
from django.conf import settings
from django.db import transaction
from django.utils.module_loading import import_string
import arroyo.arroyo_rs as native
from roam.models import File, Keyword
import logging
logger = logging.getLogger(__name__)
# logger.setLevel(logging.DEBUG)
@transaction.atomic
def persist_one_file(doc: native.Document) -> File | None:
"""this takes a path, parses it, and saves the relations to the DB,
with some caveats
Module structure ported from the sqlmodel shit, i'll re-home this
some time.
"""
# unconditionally import the keywords in to the database so that
# other Arroyo generators can work with unpublished documents
model_doc = File.create_from_arroyo(doc)
Keyword.create_from_arroyo(doc)
# collect the keywords to check if it's an Arcology-published document
kws = doc.collect_keywords("ARCOLOGY_KEY")
create_roam_relations = len(list(kws)) > 0
# if the pages will be published, we need to call all the
# create_from_arroyo builders and persist the objects.
if create_roam_relations:
for model_name in settings.ARCOLOGY_EXTRACTORS:
logger.debug(f"running {model_name}.create_from_arroyo")
the_model = import_string(model_name)
the_model.create_from_arroyo(doc)
model_doc.refresh_from_db()
else:
logger.debug(f"{doc.path} is not published, skipping roam relations.\n")
return model_doc
@transaction.atomic
def arroyo_persist_one_file(doc: native.Document):
for _name, model_name in settings.ARROYO_EXTRACTORS.items():
logger.debug(f"{_name}: {doc.path} time to go axis")
the_model = import_string(model_name)
the_model.create_from_arroyo(doc)
# Parsing and Persisting an org-mode document:1 ends here
# [[file:../roam.org::*Parsing and Persisting an org-mode document][Parsing and Persisting an org-mode document:2]]
def should_file_persist(path: str) -> (bool, bool, Optional[native.Document]):
"""
Returns a tuple
- boolean: is this in the DB
- boolean: is this in need of updating
"""
try:
ifu = is_file_updated(path)
if ifu is None: # there is no doc
doc = parse_doc(path)
return (False, True, doc)
elif ifu is True: # doc is existing but out of date; delete and parse
doc = parse_doc(path)
return (True, True, doc)
elif ifu is False: # doc is existing and unalterted
return (True, False, None)
except native.InvalidDocError: # doc can't be parsed
return (False, False, None)
# Parsing and Persisting an org-mode document:2 ends here
# [[file:../roam.org::*Parsing and Persisting an org-mode document][Parsing and Persisting an org-mode document:3]]
def is_file_updated(path: str) -> Optional[bool]:
try:
existing = File.objects.get(path=path)
if not existing.hash_updated():
return False
return True
except File.DoesNotExist:
return None
# Parsing and Persisting an org-mode document:3 ends here
# [[file:../roam.org::*Parsing and Persisting an org-mode document][Parsing and Persisting an org-mode document:4]]
from functools import cache
#XXX it's possible that this needs a cache-key with the doc hash later on.
@cache
def parse_doc(path: str) -> native.Document:
return native.parse_file(path)
# Parsing and Persisting an org-mode document:4 ends here