31 KiB
Arcology Roam Models
- Org-Roam Caching Models
- Parsing and Persisting an org-mode document
- NEXT split up the migration
- NEXT tests
- Admin
- Views
- The Rest
- NEXT move this in to Arroyo Systems Management, along with the The Arroyo Generators.
at the top here, describe the class layout and the overall usage of this module.
Org-Roam Caching Models
from __future__ import annotations
import hashlib
from typing import List, Set
from django.db import models
from django.conf import settings
from django.utils.module_loading import import_string
from django_prometheus.models import ExportModelOperationsMixin as EMOM
import arroyo.arroyo_rs as native
import logging
logger = logging.getLogger(__name__)
File
def calculate_hash(path: str) -> str:
with open(path, "rb") as f:
digest = hashlib.sha256(f.read())
return digest.hexdigest()
class File(EMOM('file'), models.Model):
path = models.CharField(max_length=512, primary_key=True)
digest = models.CharField(max_length=512)
# inbound_files = models.ManyToManyField(
# "File",
# through="Link",
# related_name="outbound_files",
# through_fields=("dest_file", "source_file")
# )
def hash_updated(self) -> bool:
file_hash = calculate_hash(self.path)
logger.debug(f"old: {self.digest} new: {file_hash}")
return file_hash != self.digest
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> File:
return cls.objects.get_or_create(
path=doc.path,
digest=calculate_hash(doc.path),
)[0]
roam.models.File
Testing
from django.test import TestCase
from django.db.utils import IntegrityError
from roam.models import File
from django.conf import settings
from arroyo import parse_file
import subprocess
from roam.models import File
class RoamFileTest(TestCase):
def setUp(self):
# super().setUp()
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
self.expected_path = str(settings.BASE_DIR / "./README.org")
self.expected_hash = (
subprocess.check_output(
f"sha256sum {self.expected_path} | awk '{{print $1}}'", shell=True
)
.decode("UTF-8")
.rstrip()
)
test create_from_arroyo
, parse this document and see if we can get a File
out of it lul
def test_cfa(self):
File.create_from_arroyo(self.native)
obj = File.objects.first()
# ensure object is instantiated properly
self.assertEqual(obj.path, self.expected_path)
self.assertEqual(obj.digest, self.expected_hash)
test the hash_updated
function, synthesize a File object and check the behavior of hash_updated
and that calculate_hash
works
def test_hash_updated(self):
t_file = File(
path=self.expected_path,
digest="12345"
)
self.assertNotEqual(t_file.digest, self.expected_hash)
self.assertEqual(t_file.hash_updated(), True)
t_file.digest = self.expected_hash
self.assertEqual(t_file.hash_updated(), False)
Keyword
class Keyword(EMOM('keyword'), models.Model):
class Meta:
# XXX: how do i get out of this? i think i just have to assume
# that there will be duplicate, and these cannot be unique, have
# to be filter()'d for
unique_together = (("path", "keyword", "value"),)
path = models.ForeignKey(
File,
on_delete=models.CASCADE,
db_column="path",
)
keyword = models.CharField(max_length=512)
value = models.CharField(max_length=512)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Keyword]:
def allowed_kw(kw: native.Keyword):
return kw.keyword in settings.ROAM_ALLOWED_KEYWORDS
return [
cls.objects.get_or_create(
path=File(path=kw.file),
keyword=kw.keyword,
value=kw.value,
)[0]
for kw in doc.keywords
if allowed_kw(kw)
]
roam.models.Keyword
Testing
- test the
path
ForeignKey
- test common queries from elsewhere in the codebase (and probably slurp those in to instance methods along the way)
from roam.models import Keyword
class RoamKeywordTest(TestCase):
def setUp(self):
# super().setUp()
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
self.expected_path = str(settings.BASE_DIR / "./README.org")
-
test
create_from_arroyo
- parse this document and see if we can get a list of
Keyword
out of it lul - validate that
ROAM_ALLOW_KEYWORDS
filter works
- parse this document and see if we can get a list of
def test_cfa(self):
file = File.create_from_arroyo(self.native)
kws = Keyword.create_from_arroyo(self.native)
self.assertEqual(len(kws), len(Keyword.objects.all()))
self.assertEqual(kws[0].keyword, "ARCOLOGY_KEY")
self.assertEqual(kws[0].value, "arcology/django")
self.assertEqual(kws[0].path, file)
self.assertEqual(kws[0].path.path, self.expected_path)
ROAM_ALLOWED_KEYWORDS
filtering
def test_cfa_allowed_keywords(self):
_file = File.create_from_arroyo(self.native)
kws = Keyword.create_from_arroyo(self.native)
kws_map = map(lambda kw: kw.keyword, kws)
self.assertNotIn("FILETAGS", kws_map)
Keyword.Meta.uniqeuness
testing
def test_uniqueness(self):
file = File.create_from_arroyo(self.native)
kw1 = Keyword(
path=file,
keyword="WHICH_ONE",
value="THE_FIRST",
)
kw2 = Keyword(
path=file,
keyword="WHICH_ONE",
value="THE_SECOND",
)
kw3 = Keyword(
path=file,
keyword="WHICH_ONE",
value="THE_FIRST",
)
kw1.save() # these will work
kw2.save() # these will work
with self.assertRaises(IntegrityError):
kw3.save() # this will raise because of the uniqeuness check
Heading
class Heading(EMOM('heading'), models.Model):
node_id = models.CharField(max_length=256, primary_key=True)
level = models.IntegerField()
title = models.TextField()
path = models.ForeignKey(
File,
on_delete=models.CASCADE,
db_column="path",
)
# reverse accessor created therein
inbound_headings = models.ManyToManyField(
"Heading",
through="Link",
related_name="outbound_headings",
through_fields=("dest_heading", "source_heading"),
)
def to_url(self) -> str:
page = self.path.page_set.first()
page_url = page.to_url()
if self.level == 0:
return page_url
else:
return f"{page_url}#{self.node_id}"
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Heading]:
return [
cls.objects.get_or_create(
node_id=heading.id,
level=heading.level,
title=heading.text,
path=File(path=doc.path),
)[0]
for heading in doc.headings or []
if heading.id is not None
]
roam.models.Heading
Testing
-
synthesize a heading set
- check
inbound_headings
andoutbound_headings
(do i even use this, should i even use this?) - check
to_url
, this is weird because it relies onFile.page_set()
which is an implicit dependency on an Arcology model. 😳
- check
from roam.models import Heading
class RoamHeadingTest(TestCase):
def setUp(self):
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
self.expected_path = str(settings.BASE_DIR / "./README.org")
self.file = File.create_from_arroyo(self.native)
Test create_from_arroyo
on this document:
def test_create_create_from_arroyo(self):
headings = Heading.create_from_arroyo(self.native)
# only headings with IDs will be created; this may need to be changed later on
README_ID_HEADING_CNT = 2
self.assertEqual(len(headings), README_ID_HEADING_CNT)
Test that file relationships are created:
def test_heading_relationships(self):
headings = Heading.create_from_arroyo(self.native)
for heading in headings:
self.assertEquals(heading.path, self.file)
Test that object internals are set properly:
def test_object_internals(self):
headings = Heading.create_from_arroyo(self.native)
self.assertEquals(headings[0].level, 0)
self.assertEquals(headings[0].node_id, "arcology/django/readme")
self.assertEquals(headings[0].title, "The Arcology Project: Django Edition")
I need to create a Page and a Site to test this … weird concern-separation happening here.
def test_to_url(self):
pass
# raise Exception("Not implemented!")
Properties
class HeadingProperty(EMOM('heading_property'), models.Model):
heading = models.ForeignKey(
Heading,
on_delete=models.CASCADE,
db_column="node_id",
)
keyword = models.CharField(max_length=256)
value = models.CharField(max_length=256)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Tag]:
return [
cls.objects.get_or_create(
heading=Heading.objects.get(node_id=heading.id),
keyword=key, value=value
)[0]
for heading in doc.headings or []
for key, value in (heading.properties or {}).items()
if heading.id is not None
]
NEXT roam.models.HeadingProperty
Testing
from roam.models import HeadingProperty
class RoamHeadingPropertyTest(TestCase):
def setUp(self):
# super().setUp()
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
self.expected_path = str(settings.BASE_DIR / "./README.org")
- test
create_from_arroyo
, parse this doc, create file and headings, and properties, validate properties are populated properly - including top-level file-properties (this will fail right now, i think)
def test_cfa(self):
_file = File.create_from_arroyo(self.native)
headings = Heading.create_from_arroyo(self.native)
props = HeadingProperty.create_from_arroyo(self.native)
this will raise because level 0 file properties are not persisted, I need to fix it in arroyo_rs. It's not included in the test, but I'd like to be able to once I fix arroyo_rs.
# self.assertEquals(len(props), 2)
fetch a level 0 heading and test it. This will also fail and is not included in the test
l0_heading = next(filter(lambda h: h.level == 0, headings))
self.assertEquals(l0_heading.level, 0)
l0h_properties = l0_heading.headingproperty_set.all()
# self.assertNotEquals(len(l0h_properties), 0)
Level 1 headings will be properly persisted, let's see if the ID for Rough Timeline and Task List is populated.
l1_heading = next(filter(lambda h: h.title == "Rough Timeline and Task List", headings))
l1h_properties = l1_heading.headingproperty_set.all()
self.assertEquals(len(l1h_properties), 1)
self.assertEquals(l1h_properties[0].keyword, "ID")
self.assertEquals(l1h_properties[0].value, "20240205T101753.548048")
NEXT [#A] fix file-level property drawer extraction in arroyo_rs, enable level 0 tests
Tag
class Tag(EMOM('tag'), models.Model):
class Meta:
unique_together = (("heading_id", "tag"),)
heading = models.ForeignKey(
Heading,
on_delete=models.CASCADE,
db_column="node_id",
)
tag = models.CharField(max_length=256)
def related_pages(self) -> Set['arcology.models.Page']:
return set(self.__class__.pages_by_name(self.tag))
@classmethod
def weighted_pages_by_name(cls, tag_name):
pages = cls.pages_by_name(tag_name)
ret = dict()
for page in pages:
ret[page] = ret.get(page, 0) + 1
return ret
@classmethod
def pages_by_name(cls, tag_name: str) -> List['arcology.models.Page']:
return [
tag_obj.heading.path.page_set.first()
for tag_obj in cls.objects.filter(tag=tag_name).distinct()
]
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Tag]:
return [
cls.objects.get_or_create(
heading=Heading.objects.get(node_id=heading.id), tag=tag
)[0]
for heading in doc.headings or []
for tag in heading.tags or []
if heading.id is not None
]
NEXT Testing
-
test
create_from_arroyo
- parse this doc, create file and headings, and tags, check some tags from this document
- including top-level file-properties and filetags
- check and audit queries, consider making more instance methods
Reference
class Reference(EMOM('reference'), models.Model):
heading = models.ForeignKey(
Heading,
on_delete=models.CASCADE,
db_column="node_id",
)
ref = models.CharField(max_length=256)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Reference]:
return [
cls.objects.get_or_create(
heading=Heading.objects.get(node_id=heading.id), ref=ref
)[0]
for heading in doc.headings or []
for ref in heading.refs or []
if heading.id is not None
]
NEXT Testing
-
test
create_from_arroyo
- parse this doc, create file and headings, and tags, check some refs from this document
- including top-level file properties refs
- check and audit queries, consider making more instance methods
Link
class Link(EMOM('link'), models.Model):
title = models.TextField()
source_file = models.ForeignKey(
File,
related_name="outbound_links",
on_delete=models.CASCADE,
)
source_heading = models.ForeignKey(
Heading,
related_name="outbound_links",
on_delete=models.CASCADE,
to_field="node_id",
)
dest_heading = models.ForeignKey(
Heading,
related_name="inbound_links",
on_delete=models.DO_NOTHING,
db_constraint=False,
null=True,
default=None,
to_field="node_id",
)
def __repr__(self) -> str:
return f"<Link (from: {self.source_heading_id}, to: {self.dest_heading_id}, text: {self.title})>"
def __str__(self) -> str:
return self.__repr__()
def to_backlink_html(self) -> str:
try:
h = self.source_heading
page = h.path.page_set.first()
url = h.to_url()
title = page.title
return f'''<a class="internal" href="{url}">{title}</a>'''
except Heading.DoesNotExist:
logger.info(f"{self} does not have dest heading.")
return f'''<a class="dead-link" href="/404?text={self.title|iriencode}">{self.title}</a>'''
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Link]:
heading_trail = []
ret = []
for heading in doc.headings:
if heading.id is not None:
# reset breadcrumb trail
heading_trail = heading_trail[(heading.level-1):]
heading_trail += [heading.id]
for link in heading.links or []:
if link.to_proto == "id":
logger.debug(f"link: {link}")
logger.debug(f"dest: {link.to}")
obj = cls(title=(link.text or ""))
obj.source_file = File.objects.get(path=doc.path)
obj.source_heading = Heading.objects.get(node_id=heading_trail[-1:][0])
# fudge this since we may be linking to Headings which are not yet indexed
# dest_heading = Heading.objects.get(node_id=dest_id)
# obj.dest_file = dest_heading.path
obj.dest_heading_id = link.to
logger.warn(f"save maybe {obj}")
obj.save()
ret.append(obj)
else:
# create a pseudo-link or a link that can be resolved using Reference?
# dest_id = ""
# dest_file = link.to
logger.warn(f"Skipping non-id link {link}")
return ret
NEXT Testing
-
test
create_from_arroyo
- parse this doc, create file and headings, and validate the behavior of links through ones on this page
- internal ID links, external HTTP/s, shell commands and other Emacs clickables
- check and audit queries, consider making more instance methods
NEXT Roam Heading Aliases
Parsing and Persisting an org-mode document
roam.core.persist_one_file
takes an arroyo_rs Native Org Parser document and does all the magic to store it in to the DB. Whether it needs to be updated is checked nearby. (this is in the Arcology ingest_files Command but should be moved out to here maybe, that thing is a bit of ball of mud…. dorodango reigns)
it would be nice to put these in a single class wrapping the native doc…
from typing import Optional
from django.conf import settings
from django.db import transaction
from django.utils.module_loading import import_string
import arroyo.arroyo_rs as native
from roam.models import File, Keyword
import logging
logger = logging.getLogger(__name__)
# logger.setLevel(logging.DEBUG)
@transaction.atomic
def persist_one_file(doc: native.Document) -> File | None:
"""this takes a path, parses it, and saves the relations to the DB,
with some caveats
Module structure ported from the sqlmodel shit, i'll re-home this
some time.
"""
# unconditionally import the keywords in to the database so that
# other Arroyo generators can work with unpublished documents
model_doc = File.create_from_arroyo(doc)
Keyword.create_from_arroyo(doc)
# collect the keywords to check if it's an Arcology-published document
kws = doc.collect_keywords("ARCOLOGY_KEY")
create_roam_relations = len(list(kws)) > 0
# if the pages will be published, we need to call all the
# create_from_arroyo builders and persist the objects.
if create_roam_relations:
for model_name in settings.ARCOLOGY_EXTRACTORS:
logger.debug(f"running {model_name}.create_from_arroyo")
the_model = import_string(model_name)
the_model.create_from_arroyo(doc)
model_doc.refresh_from_db()
else:
logger.debug(f"{doc.path} is not published, skipping roam relations.\n")
return model_doc
@transaction.atomic
def arroyo_persist_one_file(doc: native.Document):
for _name, model_name in settings.ARROYO_EXTRACTORS.items():
logger.debug(f"{_name}: {doc.path} time to go axis")
the_model = import_string(model_name)
the_model.create_from_arroyo(doc)
This should_file_persist
function is from and for the Arcology ingest_files Command and that's okay. It returns two boolean values, is_existing
and need_update
which are used to branch the behavior of that command.
def should_file_persist(path: str) -> (bool, bool, Optional[native.Document]):
"""
Returns a tuple
- boolean: is this in the DB
- boolean: is this in need of updating
"""
try:
ifu = is_file_updated(path)
if ifu is None: # there is no doc
doc = parse_doc(path)
return (False, True, doc)
elif ifu is True: # doc is existing but out of date; delete and parse
doc = parse_doc(path)
return (True, True, doc)
elif ifu is False: # doc is existing and unalterted
return (True, False, None)
except native.InvalidDocError: # doc can't be parsed
return (False, False, None)
is_file_updated
is used in the command to compare the hash of the file on-disk to the one stored in the DB by File.create_from_arroyo
. It returns True if the file needs to be re-parsed, False if it does not, and None if it has never been indexed before. WTB a Mu datatype.
def is_file_updated(path: str) -> Optional[bool]:
try:
existing = File.objects.get(path=path)
if not existing.hash_updated():
return False
return True
except File.DoesNotExist:
return None
Document parsing is memoized using functools.cache
because each changed document is parsed twice, once to get the core data models and one to run The Arroyo Generators' indexers. It's quite possible that I'll need to feed the file hash in to this later on if the ingest_files
command is called repeatedly from the Syncthing client below in multiple successions.
from functools import cache
#XXX it's possible that this needs a cache-key with the doc hash later on.
@cache
def parse_doc(path: str) -> native.Document:
return native.parse_file(path)
NEXT Testing
Test is_file_updated
and should_file_persist
, these are scary.
Test parse_doc and the cache behavior somehow, that XXX
above…
NEXT split up the migration
# Generated by Django 4.2.6 on 2023-12-17 20:50
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
replaces = [
("roam", "0001_initial"),
("roam", "0002_link_file_inbound_files_heading_inbound_headings"),
("roam", "0003_remove_link_dest_file_remove_link_dest_heading_and_more"),
("roam", "0004_remove_file_inbound_files_remove_heading_id_and_more"),
("roam", "0005_alter_link_dest_heading"),
]
dependencies = []
operations = [
migrations.CreateModel(
name="File",
fields=[
(
"path",
models.CharField(max_length=512, primary_key=True, serialize=False),
),
("digest", models.CharField(max_length=512)),
],
),
migrations.CreateModel(
name="Keyword",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("keyword", models.CharField(max_length=512)),
("value", models.CharField(max_length=512)),
(
"path",
models.ForeignKey(
db_column="path",
on_delete=django.db.models.deletion.CASCADE,
to="roam.file",
),
),
],
options={
"unique_together": {("path", "keyword", "value")},
},
),
migrations.CreateModel(
name="Link",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("title", models.TextField()),
(
"dest_file",
models.ForeignKey(
default="",
on_delete=django.db.models.deletion.CASCADE,
related_name="inbound_links",
to="roam.file",
),
),
(
"source_file",
models.ForeignKey(
default="",
on_delete=django.db.models.deletion.CASCADE,
related_name="outbound_links",
to="roam.file",
),
),
],
),
migrations.CreateModel(
name="Heading",
fields=[
(
"node_id",
models.CharField(max_length=256, primary_key=True, serialize=False),
),
("level", models.IntegerField()),
("title", models.TextField()),
(
"path",
models.ForeignKey(
db_column="path",
on_delete=django.db.models.deletion.CASCADE,
to="roam.file",
),
),
(
"inbound_headings",
models.ManyToManyField(
related_name="outbound_headings",
through="roam.Link",
to="roam.heading",
),
),
],
),
migrations.AddField(
model_name="link",
name="source_heading",
field=models.ForeignKey(
default="",
on_delete=django.db.models.deletion.CASCADE,
related_name="outbound_links",
to="roam.heading",
),
preserve_default=False,
),
migrations.RemoveField(
model_name="link",
name="dest_file",
),
migrations.CreateModel(
name="Reference",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("ref", models.CharField(max_length=256)),
(
"heading",
models.ForeignKey(
db_column="node_id",
on_delete=django.db.models.deletion.CASCADE,
to="roam.heading",
),
),
],
),
migrations.CreateModel(
name="Tag",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("tag", models.CharField(max_length=256)),
(
"heading",
models.ForeignKey(
db_column="node_id",
on_delete=django.db.models.deletion.CASCADE,
to="roam.heading",
),
),
],
options={
"unique_together": {("heading_id", "tag")},
},
),
migrations.AddField(
model_name="link",
name="dest_heading",
field=models.ForeignKey(
db_constraint=False,
default=None,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="inbound_links",
to="roam.heading",
),
),
]
NEXT tests
NEXT create a test harness for create_from_arroyo
tests…
Admin
from django.contrib import admin
from django.contrib import admin
import roam.models
class KeywordInline(admin.TabularInline):
model = roam.models.Keyword
class HeadingInline(admin.TabularInline):
model = roam.models.Heading
class LinkInline(admin.TabularInline):
model = roam.models.Link
fk_name = "source_heading"
class PropertyInline(admin.TabularInline):
model = roam.models.HeadingProperty
class TagInline(admin.TabularInline):
model = roam.models.Tag
class ReferenceInline(admin.TabularInline):
model = roam.models.Reference
@admin.register(roam.models.HeadingProperty)
class PropertyAdmin(admin.ModelAdmin):
list_display = ["heading", "keyword", "value"]
@admin.register(roam.models.Keyword)
class KeywordAdmin(admin.ModelAdmin):
list_display = ["path", "keyword", "value"]
@admin.register(roam.models.Reference)
class ReferenceAdmin(admin.ModelAdmin):
list_display = ["heading", "ref"]
@admin.register(roam.models.Tag)
class TagAdmin(admin.ModelAdmin):
list_display = ["heading", "tag"]
@admin.register(roam.models.File)
class FileAdmin(admin.ModelAdmin):
inlines = [
KeywordInline,
HeadingInline,
]
@admin.register(roam.models.Heading)
class HeadingAdmin(admin.ModelAdmin):
list_display = ["node_id", "path"]
inlines = [
TagInline,
ReferenceInline,
PropertyInline,
LinkInline,
]
admin.site.register(roam.models.Link)
Views
there probably aren't any but for now:
from django.shortcuts import render
# Create your views here.
The Rest
from django.apps import AppConfig
class RoamConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "roam"