983 lines
31 KiB
Org Mode
983 lines
31 KiB
Org Mode
# -*- org-src-preserve-indentation: t; -*-
|
|
:PROPERTIES:
|
|
:ID: arcology/django/roam
|
|
:END:
|
|
#+TITLE: Arcology Roam Models
|
|
#+filetags: :Project:
|
|
|
|
#+ARCOLOGY_KEY: arcology/roam
|
|
|
|
#+BEGIN_SRC python :tangle roam/__init__.py
|
|
#+END_SRC
|
|
|
|
at the top here, describe the class layout and the overall usage of this module.
|
|
|
|
* Org-Roam Caching Models
|
|
:PROPERTIES:
|
|
:ID: 20240204T234111.701754
|
|
:ROAM_ALIASES: "Arcology Org-Roam Caching Models"
|
|
:END:
|
|
|
|
#+BEGIN_SRC python :tangle roam/models.py
|
|
from __future__ import annotations
|
|
import hashlib
|
|
from typing import List, Set
|
|
|
|
from django.db import models
|
|
from django.conf import settings
|
|
from django.utils.module_loading import import_string
|
|
from django_prometheus.models import ExportModelOperationsMixin as EMOM
|
|
|
|
import arroyo.arroyo_rs as native
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
#+END_SRC
|
|
|
|
** File
|
|
|
|
#+BEGIN_SRC python :tangle roam/models.py
|
|
def calculate_hash(path: str) -> str:
|
|
with open(path, "rb") as f:
|
|
digest = hashlib.sha256(f.read())
|
|
|
|
return digest.hexdigest()
|
|
|
|
|
|
class File(EMOM('file'), models.Model):
|
|
path = models.CharField(max_length=512, primary_key=True)
|
|
digest = models.CharField(max_length=512)
|
|
|
|
# inbound_files = models.ManyToManyField(
|
|
# "File",
|
|
# through="Link",
|
|
# related_name="outbound_files",
|
|
# through_fields=("dest_file", "source_file")
|
|
# )
|
|
|
|
def hash_updated(self) -> bool:
|
|
file_hash = calculate_hash(self.path)
|
|
logger.debug(f"old: {self.digest} new: {file_hash}")
|
|
return file_hash != self.digest
|
|
|
|
@classmethod
|
|
def create_from_arroyo(cls, doc: native.Document) -> File:
|
|
return cls.objects.get_or_create(
|
|
path=doc.path,
|
|
digest=calculate_hash(doc.path),
|
|
)[0]
|
|
#+END_SRC
|
|
|
|
*** =roam.models.File= Testing
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
from django.test import TestCase
|
|
from django.db.utils import IntegrityError
|
|
|
|
from roam.models import File
|
|
from django.conf import settings
|
|
from arroyo import parse_file
|
|
import subprocess
|
|
#+end_src
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
from roam.models import File
|
|
class RoamFileTest(TestCase):
|
|
def setUp(self):
|
|
# super().setUp()
|
|
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
|
|
self.expected_path = str(settings.BASE_DIR / "./README.org")
|
|
self.expected_hash = (
|
|
subprocess.check_output(
|
|
f"sha256sum {self.expected_path} | awk '{{print $1}}'", shell=True
|
|
)
|
|
.decode("UTF-8")
|
|
.rstrip()
|
|
)
|
|
#+end_src
|
|
|
|
test =create_from_arroyo=, parse this document and see if we can get a =File= out of it lul
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_cfa(self):
|
|
File.create_from_arroyo(self.native)
|
|
obj = File.objects.first()
|
|
|
|
# ensure object is instantiated properly
|
|
self.assertEqual(obj.path, self.expected_path)
|
|
self.assertEqual(obj.digest, self.expected_hash)
|
|
#+end_src
|
|
|
|
test the =hash_updated= function, synthesize a File object and check the behavior of =hash_updated= and that =calculate_hash= works
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_hash_updated(self):
|
|
t_file = File(
|
|
path=self.expected_path,
|
|
digest="12345"
|
|
)
|
|
|
|
self.assertNotEqual(t_file.digest, self.expected_hash)
|
|
self.assertEqual(t_file.hash_updated(), True)
|
|
t_file.digest = self.expected_hash
|
|
self.assertEqual(t_file.hash_updated(), False)
|
|
#+end_src
|
|
|
|
** Keyword
|
|
|
|
#+BEGIN_SRC python :tangle roam/models.py
|
|
class Keyword(EMOM('keyword'), models.Model):
|
|
class Meta:
|
|
# XXX: how do i get out of this? i think i just have to assume
|
|
# that there will be duplicate, and these cannot be unique, have
|
|
# to be filter()'d for
|
|
unique_together = (("path", "keyword", "value"),)
|
|
|
|
path = models.ForeignKey(
|
|
File,
|
|
on_delete=models.CASCADE,
|
|
db_column="path",
|
|
)
|
|
keyword = models.CharField(max_length=512)
|
|
value = models.CharField(max_length=512)
|
|
|
|
@classmethod
|
|
def create_from_arroyo(cls, doc: native.Document) -> List[Keyword]:
|
|
def allowed_kw(kw: native.Keyword):
|
|
return kw.keyword in settings.ROAM_ALLOWED_KEYWORDS
|
|
|
|
return [
|
|
cls.objects.get_or_create(
|
|
path=File(path=kw.file),
|
|
keyword=kw.keyword,
|
|
value=kw.value,
|
|
)[0]
|
|
for kw in doc.keywords
|
|
if allowed_kw(kw)
|
|
]
|
|
#+END_SRC
|
|
|
|
*** =roam.models.Keyword= Testing
|
|
|
|
- test the =path= =ForeignKey=
|
|
- test common queries from elsewhere in the codebase (and probably slurp those in to instance methods along the way)
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
from roam.models import Keyword
|
|
|
|
class RoamKeywordTest(TestCase):
|
|
def setUp(self):
|
|
# super().setUp()
|
|
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
|
|
self.expected_path = str(settings.BASE_DIR / "./README.org")
|
|
#+end_src
|
|
|
|
- test =create_from_arroyo=
|
|
- parse this document and see if we can get a list of =Keyword= out of it lul
|
|
- validate that =ROAM_ALLOW_KEYWORDS= filter works
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_cfa(self):
|
|
file = File.create_from_arroyo(self.native)
|
|
kws = Keyword.create_from_arroyo(self.native)
|
|
|
|
self.assertEqual(len(kws), len(Keyword.objects.all()))
|
|
self.assertEqual(kws[0].keyword, "ARCOLOGY_KEY")
|
|
self.assertEqual(kws[0].value, "arcology/django")
|
|
self.assertEqual(kws[0].path, file)
|
|
self.assertEqual(kws[0].path.path, self.expected_path)
|
|
#+end_src
|
|
|
|
- =ROAM_ALLOWED_KEYWORDS= filtering
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_cfa_allowed_keywords(self):
|
|
_file = File.create_from_arroyo(self.native)
|
|
kws = Keyword.create_from_arroyo(self.native)
|
|
|
|
kws_map = map(lambda kw: kw.keyword, kws)
|
|
self.assertNotIn("FILETAGS", kws_map)
|
|
#+end_src
|
|
|
|
- =Keyword.Meta.uniqeuness= testing
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_uniqueness(self):
|
|
file = File.create_from_arroyo(self.native)
|
|
|
|
kw1 = Keyword(
|
|
path=file,
|
|
keyword="WHICH_ONE",
|
|
value="THE_FIRST",
|
|
)
|
|
|
|
kw2 = Keyword(
|
|
path=file,
|
|
keyword="WHICH_ONE",
|
|
value="THE_SECOND",
|
|
)
|
|
|
|
kw3 = Keyword(
|
|
path=file,
|
|
keyword="WHICH_ONE",
|
|
value="THE_FIRST",
|
|
)
|
|
|
|
kw1.save() # these will work
|
|
kw2.save() # these will work
|
|
with self.assertRaises(IntegrityError):
|
|
kw3.save() # this will raise because of the uniqeuness check
|
|
#+end_src
|
|
|
|
** Heading
|
|
|
|
#+BEGIN_SRC python :tangle roam/models.py
|
|
class Heading(EMOM('heading'), models.Model):
|
|
node_id = models.CharField(max_length=256, primary_key=True)
|
|
level = models.IntegerField()
|
|
title = models.TextField()
|
|
|
|
path = models.ForeignKey(
|
|
File,
|
|
on_delete=models.CASCADE,
|
|
db_column="path",
|
|
)
|
|
|
|
# reverse accessor created therein
|
|
inbound_headings = models.ManyToManyField(
|
|
"Heading",
|
|
through="Link",
|
|
related_name="outbound_headings",
|
|
through_fields=("dest_heading", "source_heading"),
|
|
)
|
|
|
|
def to_url(self) -> str:
|
|
page = self.path.page_set.first()
|
|
page_url = page.to_url()
|
|
if self.level == 0:
|
|
return page_url
|
|
else:
|
|
return f"{page_url}#{self.node_id}"
|
|
|
|
|
|
@classmethod
|
|
def create_from_arroyo(cls, doc: native.Document) -> List[Heading]:
|
|
return [
|
|
cls.objects.get_or_create(
|
|
node_id=heading.id,
|
|
level=heading.level,
|
|
title=heading.text,
|
|
path=File(path=doc.path),
|
|
)[0]
|
|
for heading in doc.headings or []
|
|
if heading.id is not None
|
|
]
|
|
#+END_SRC
|
|
|
|
*** =roam.models.Heading= Testing
|
|
|
|
- synthesize a heading set
|
|
- check =inbound_headings= and =outbound_headings= (do i even use this, should i even use this?)
|
|
- check =to_url=, this is weird because it relies on =File.page_set()= which is an implicit dependency on an Arcology model. 😳
|
|
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
from roam.models import Heading
|
|
|
|
class RoamHeadingTest(TestCase):
|
|
def setUp(self):
|
|
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
|
|
self.expected_path = str(settings.BASE_DIR / "./README.org")
|
|
self.file = File.create_from_arroyo(self.native)
|
|
#+end_src
|
|
|
|
Test =create_from_arroyo= on this document:
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_create_create_from_arroyo(self):
|
|
headings = Heading.create_from_arroyo(self.native)
|
|
|
|
# only headings with IDs will be created; this may need to be changed later on
|
|
README_ID_HEADING_CNT = 2
|
|
self.assertEqual(len(headings), README_ID_HEADING_CNT)
|
|
#+end_src
|
|
|
|
Test that file relationships are created:
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_heading_relationships(self):
|
|
headings = Heading.create_from_arroyo(self.native)
|
|
for heading in headings:
|
|
self.assertEquals(heading.path, self.file)
|
|
#+end_src
|
|
|
|
Test that object internals are set properly:
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_object_internals(self):
|
|
headings = Heading.create_from_arroyo(self.native)
|
|
self.assertEquals(headings[0].level, 0)
|
|
self.assertEquals(headings[0].node_id, "arcology/django/readme")
|
|
self.assertEquals(headings[0].title, "The Arcology Project: Django Edition")
|
|
#+end_src
|
|
|
|
I need to create a Page and a Site to test this ... weird concern-separation happening here.
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_to_url(self):
|
|
pass
|
|
# raise Exception("Not implemented!")
|
|
#+end_src
|
|
|
|
** Properties
|
|
|
|
#+begin_src python :tangle roam/models.py
|
|
class HeadingProperty(EMOM('heading_property'), models.Model):
|
|
heading = models.ForeignKey(
|
|
Heading,
|
|
on_delete=models.CASCADE,
|
|
db_column="node_id",
|
|
)
|
|
keyword = models.CharField(max_length=256)
|
|
value = models.CharField(max_length=256)
|
|
|
|
@classmethod
|
|
def create_from_arroyo(cls, doc: native.Document) -> List[Tag]:
|
|
return [
|
|
cls.objects.get_or_create(
|
|
heading=Heading.objects.get(node_id=heading.id),
|
|
keyword=key, value=value
|
|
)[0]
|
|
for heading in doc.headings or []
|
|
for key, value in (heading.properties or {}).items()
|
|
if heading.id is not None
|
|
]
|
|
#+end_src
|
|
|
|
*** NEXT =roam.models.HeadingProperty= Testing
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
from roam.models import HeadingProperty
|
|
|
|
class RoamHeadingPropertyTest(TestCase):
|
|
def setUp(self):
|
|
# super().setUp()
|
|
self.native = parse_file(str(settings.BASE_DIR / "./README.org"))
|
|
self.expected_path = str(settings.BASE_DIR / "./README.org")
|
|
#+end_src
|
|
|
|
- test =create_from_arroyo=, parse this doc, create file and headings, and properties, validate properties are populated properly
|
|
- including top-level file-properties (this will fail right now, i think)
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
def test_cfa(self):
|
|
_file = File.create_from_arroyo(self.native)
|
|
headings = Heading.create_from_arroyo(self.native)
|
|
props = HeadingProperty.create_from_arroyo(self.native)
|
|
#+end_src
|
|
|
|
this will raise because level 0 file properties are not persisted, I need to fix it in [[id:20231023T115950.248543][arroyo_rs]]. It's not included in the test, but I'd like to be able to once I fix arroyo_rs.
|
|
|
|
#+begin_src python
|
|
# self.assertEquals(len(props), 2)
|
|
#+end_src
|
|
|
|
fetch a level 0 heading and test it. This will also fail and is not included in the test
|
|
|
|
#+begin_src python
|
|
l0_heading = next(filter(lambda h: h.level == 0, headings))
|
|
self.assertEquals(l0_heading.level, 0)
|
|
l0h_properties = l0_heading.headingproperty_set.all()
|
|
# self.assertNotEquals(len(l0h_properties), 0)
|
|
#+end_src
|
|
|
|
Level 1 headings will be properly persisted, let's see if the ID for [[id:20240205T101753.548048][Rough Timeline and Task List]] is populated.
|
|
|
|
#+begin_src python :tangle roam/tests.py
|
|
l1_heading = next(filter(lambda h: h.title == "Rough Timeline and Task List", headings))
|
|
l1h_properties = l1_heading.headingproperty_set.all()
|
|
self.assertEquals(len(l1h_properties), 1)
|
|
|
|
self.assertEquals(l1h_properties[0].keyword, "ID")
|
|
self.assertEquals(l1h_properties[0].value, "20240205T101753.548048")
|
|
#+end_src
|
|
|
|
**** NEXT [#A] fix file-level property drawer extraction in [[id:20231023T115950.248543][arroyo_rs]], enable level 0 tests
|
|
|
|
** Tag
|
|
|
|
#+BEGIN_SRC python :tangle roam/models.py
|
|
class Tag(EMOM('tag'), models.Model):
|
|
class Meta:
|
|
unique_together = (("heading_id", "tag"),)
|
|
|
|
heading = models.ForeignKey(
|
|
Heading,
|
|
on_delete=models.CASCADE,
|
|
db_column="node_id",
|
|
)
|
|
tag = models.CharField(max_length=256)
|
|
|
|
def related_pages(self) -> Set['arcology.models.Page']:
|
|
return set(self.__class__.pages_by_name(self.tag))
|
|
|
|
@classmethod
|
|
def weighted_pages_by_name(cls, tag_name):
|
|
pages = cls.pages_by_name(tag_name)
|
|
ret = dict()
|
|
for page in pages:
|
|
ret[page] = ret.get(page, 0) + 1
|
|
return ret
|
|
|
|
@classmethod
|
|
def pages_by_name(cls, tag_name: str) -> List['arcology.models.Page']:
|
|
return [
|
|
tag_obj.heading.path.page_set.first()
|
|
for tag_obj in cls.objects.filter(tag=tag_name).distinct()
|
|
]
|
|
|
|
|
|
@classmethod
|
|
def create_from_arroyo(cls, doc: native.Document) -> List[Tag]:
|
|
return [
|
|
cls.objects.get_or_create(
|
|
heading=Heading.objects.get(node_id=heading.id), tag=tag
|
|
)[0]
|
|
for heading in doc.headings or []
|
|
for tag in heading.tags or []
|
|
if heading.id is not None
|
|
]
|
|
#+END_SRC
|
|
|
|
*** NEXT Testing
|
|
|
|
- test =create_from_arroyo=
|
|
- parse this doc, create file and headings, and tags, check some tags from this document
|
|
- including top-level file-properties and filetags
|
|
- check and audit queries, consider making more instance methods
|
|
|
|
** Reference
|
|
|
|
#+BEGIN_SRC python :tangle roam/models.py
|
|
class Reference(EMOM('reference'), models.Model):
|
|
heading = models.ForeignKey(
|
|
Heading,
|
|
on_delete=models.CASCADE,
|
|
db_column="node_id",
|
|
)
|
|
ref = models.CharField(max_length=256)
|
|
|
|
@classmethod
|
|
def create_from_arroyo(cls, doc: native.Document) -> List[Reference]:
|
|
return [
|
|
cls.objects.get_or_create(
|
|
heading=Heading.objects.get(node_id=heading.id), ref=ref
|
|
)[0]
|
|
for heading in doc.headings or []
|
|
for ref in heading.refs or []
|
|
if heading.id is not None
|
|
]
|
|
#+END_SRC
|
|
|
|
*** NEXT Testing
|
|
|
|
- test =create_from_arroyo=
|
|
- parse this doc, create file and headings, and tags, check some refs from this document
|
|
- including top-level file properties refs
|
|
- check and audit queries, consider making more instance methods
|
|
|
|
** Link
|
|
|
|
#+BEGIN_SRC python :tangle roam/models.py
|
|
class Link(EMOM('link'), models.Model):
|
|
title = models.TextField()
|
|
source_file = models.ForeignKey(
|
|
File,
|
|
related_name="outbound_links",
|
|
on_delete=models.CASCADE,
|
|
)
|
|
source_heading = models.ForeignKey(
|
|
Heading,
|
|
related_name="outbound_links",
|
|
on_delete=models.CASCADE,
|
|
to_field="node_id",
|
|
)
|
|
dest_heading = models.ForeignKey(
|
|
Heading,
|
|
related_name="inbound_links",
|
|
on_delete=models.DO_NOTHING,
|
|
db_constraint=False,
|
|
null=True,
|
|
default=None,
|
|
to_field="node_id",
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<Link (from: {self.source_heading_id}, to: {self.dest_heading_id}, text: {self.title})>"
|
|
|
|
def __str__(self) -> str:
|
|
return self.__repr__()
|
|
|
|
def to_backlink_html(self) -> str:
|
|
try:
|
|
h = self.source_heading
|
|
page = h.path.page_set.first()
|
|
url = h.to_url()
|
|
title = page.title
|
|
return f'''<a class="internal" href="{url}">{title}</a>'''
|
|
except Heading.DoesNotExist:
|
|
logger.info(f"{self} does not have dest heading.")
|
|
return f'''<a class="dead-link" href="/404?text={self.title|iriencode}">{self.title}</a>'''
|
|
|
|
@classmethod
|
|
def create_from_arroyo(cls, doc: native.Document) -> List[Link]:
|
|
heading_trail = []
|
|
ret = []
|
|
for heading in doc.headings:
|
|
if heading.id is not None:
|
|
# reset breadcrumb trail
|
|
heading_trail = heading_trail[(heading.level-1):]
|
|
heading_trail += [heading.id]
|
|
for link in heading.links or []:
|
|
if link.to_proto == "id":
|
|
logger.debug(f"link: {link}")
|
|
logger.debug(f"dest: {link.to}")
|
|
|
|
obj = cls(title=(link.text or ""))
|
|
obj.source_file = File.objects.get(path=doc.path)
|
|
obj.source_heading = Heading.objects.get(node_id=heading_trail[-1:][0])
|
|
# fudge this since we may be linking to Headings which are not yet indexed
|
|
# dest_heading = Heading.objects.get(node_id=dest_id)
|
|
# obj.dest_file = dest_heading.path
|
|
obj.dest_heading_id = link.to
|
|
logger.warn(f"save maybe {obj}")
|
|
obj.save()
|
|
ret.append(obj)
|
|
else:
|
|
# create a pseudo-link or a link that can be resolved using Reference?
|
|
# dest_id = ""
|
|
# dest_file = link.to
|
|
logger.warn(f"Skipping non-id link {link}")
|
|
|
|
return ret
|
|
#+END_SRC
|
|
|
|
*** NEXT Testing
|
|
|
|
- test =create_from_arroyo=
|
|
- parse this doc, create file and headings, and validate the behavior of links through ones on this page
|
|
- internal ID links, external HTTP/s, shell commands and other Emacs clickables
|
|
- check and audit queries, consider making more instance methods
|
|
|
|
** NEXT Roam Heading Aliases
|
|
|
|
* Parsing and Persisting an org-mode document
|
|
:PROPERTIES:
|
|
:ID: 20231218T151642.210449
|
|
:ROAM_ALIASES: persist_one_file
|
|
:END:
|
|
=roam.core.persist_one_file= takes an [[id:20231023T115950.248543][arroyo_rs Native Org Parser]] document and does all the magic to store it in to the DB. Whether it needs to be updated is checked nearby. (this is in the [[id:20231217T154857.983742][Arcology ingest_files Command]] but should be moved out to here maybe, that thing is a bit of ball of mud.... dorodango reigns)
|
|
|
|
it would be nice to put these in a single class wrapping the native doc...
|
|
|
|
#+BEGIN_SRC python :tangle roam/core.py
|
|
from typing import Optional
|
|
|
|
from django.conf import settings
|
|
from django.db import transaction
|
|
from django.utils.module_loading import import_string
|
|
|
|
import arroyo.arroyo_rs as native
|
|
from roam.models import File, Keyword
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
# logger.setLevel(logging.DEBUG)
|
|
|
|
@transaction.atomic
|
|
def persist_one_file(doc: native.Document) -> File | None:
|
|
"""this takes a path, parses it, and saves the relations to the DB,
|
|
with some caveats
|
|
|
|
Module structure ported from the sqlmodel shit, i'll re-home this
|
|
some time.
|
|
|
|
"""
|
|
# unconditionally import the keywords in to the database so that
|
|
# other Arroyo generators can work with unpublished documents
|
|
model_doc = File.create_from_arroyo(doc)
|
|
Keyword.create_from_arroyo(doc)
|
|
|
|
# collect the keywords to check if it's an Arcology-published document
|
|
kws = doc.collect_keywords("ARCOLOGY_KEY")
|
|
create_roam_relations = len(list(kws)) > 0
|
|
|
|
# if the pages will be published, we need to call all the
|
|
# create_from_arroyo builders and persist the objects.
|
|
if create_roam_relations:
|
|
for model_name in settings.ARCOLOGY_EXTRACTORS:
|
|
logger.debug(f"running {model_name}.create_from_arroyo")
|
|
the_model = import_string(model_name)
|
|
the_model.create_from_arroyo(doc)
|
|
|
|
model_doc.refresh_from_db()
|
|
else:
|
|
logger.debug(f"{doc.path} is not published, skipping roam relations.\n")
|
|
|
|
return model_doc
|
|
|
|
@transaction.atomic
|
|
def arroyo_persist_one_file(doc: native.Document):
|
|
for _name, model_name in settings.ARROYO_EXTRACTORS.items():
|
|
logger.debug(f"{_name}: {doc.path} time to go axis")
|
|
the_model = import_string(model_name)
|
|
the_model.create_from_arroyo(doc)
|
|
#+END_SRC
|
|
|
|
This =should_file_persist= function is from and for the [[id:20231217T154857.983742][Arcology ingest_files Command]] and that's okay. It returns two boolean values, =is_existing= and =need_update= which are used to branch the behavior of that command.
|
|
|
|
#+BEGIN_SRC python :tangle roam/core.py
|
|
def should_file_persist(path: str) -> (bool, bool, Optional[native.Document]):
|
|
"""
|
|
Returns a tuple
|
|
- boolean: is this in the DB
|
|
- boolean: is this in need of updating
|
|
"""
|
|
try:
|
|
ifu = is_file_updated(path)
|
|
if ifu is None: # there is no doc
|
|
doc = parse_doc(path)
|
|
return (False, True, doc)
|
|
elif ifu is True: # doc is existing but out of date; delete and parse
|
|
doc = parse_doc(path)
|
|
return (True, True, doc)
|
|
elif ifu is False: # doc is existing and unalterted
|
|
return (True, False, None)
|
|
except native.InvalidDocError: # doc can't be parsed
|
|
return (False, False, None)
|
|
#+END_SRC
|
|
=is_file_updated= is used in the command to compare the hash of the file on-disk to the one stored in the DB by =File.create_from_arroyo=. It returns True if the file needs to be re-parsed, False if it does not, and None if it has never been indexed before. WTB a [[id:15656e1e-674d-454f-993c-28442550aae4][Mu]] datatype.
|
|
|
|
#+BEGIN_SRC python :tangle roam/core.py
|
|
def is_file_updated(path: str) -> Optional[bool]:
|
|
try:
|
|
existing = File.objects.get(path=path)
|
|
if not existing.hash_updated():
|
|
return False
|
|
return True
|
|
except File.DoesNotExist:
|
|
return None
|
|
#+END_SRC
|
|
|
|
Document parsing is memoized using =functools.cache= because each changed document is parsed twice, once to get the core data models and one to run [[id:arroyo/django/generators][The Arroyo Generators]]' indexers. It's quite possible that I'll need to feed the file hash in to this later on if the =ingest_files= command is called repeatedly from the Syncthing client below in multiple successions.
|
|
|
|
#+begin_src python :tangle roam/core.py
|
|
from functools import cache
|
|
|
|
#XXX it's possible that this needs a cache-key with the doc hash later on.
|
|
@cache
|
|
def parse_doc(path: str) -> native.Document:
|
|
return native.parse_file(path)
|
|
#+end_src
|
|
|
|
** NEXT Testing
|
|
|
|
Test =is_file_updated= and =should_file_persist=, these are scary.
|
|
|
|
Test parse_doc and the cache behavior somehow, that =XXX= above...
|
|
|
|
* NEXT split up the migration
|
|
|
|
#+BEGIN_SRC python :tangle roam/migrations/0001_base.py :noweb yes
|
|
# Generated by Django 4.2.6 on 2023-12-17 20:50
|
|
|
|
from django.db import migrations, models
|
|
import django.db.models.deletion
|
|
|
|
|
|
class Migration(migrations.Migration):
|
|
replaces = [
|
|
("roam", "0001_initial"),
|
|
("roam", "0002_link_file_inbound_files_heading_inbound_headings"),
|
|
("roam", "0003_remove_link_dest_file_remove_link_dest_heading_and_more"),
|
|
("roam", "0004_remove_file_inbound_files_remove_heading_id_and_more"),
|
|
("roam", "0005_alter_link_dest_heading"),
|
|
]
|
|
|
|
dependencies = []
|
|
|
|
operations = [
|
|
migrations.CreateModel(
|
|
name="File",
|
|
fields=[
|
|
(
|
|
"path",
|
|
models.CharField(max_length=512, primary_key=True, serialize=False),
|
|
),
|
|
("digest", models.CharField(max_length=512)),
|
|
],
|
|
),
|
|
migrations.CreateModel(
|
|
name="Keyword",
|
|
fields=[
|
|
(
|
|
"id",
|
|
models.BigAutoField(
|
|
auto_created=True,
|
|
primary_key=True,
|
|
serialize=False,
|
|
verbose_name="ID",
|
|
),
|
|
),
|
|
("keyword", models.CharField(max_length=512)),
|
|
("value", models.CharField(max_length=512)),
|
|
(
|
|
"path",
|
|
models.ForeignKey(
|
|
db_column="path",
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
to="roam.file",
|
|
),
|
|
),
|
|
],
|
|
options={
|
|
"unique_together": {("path", "keyword", "value")},
|
|
},
|
|
),
|
|
migrations.CreateModel(
|
|
name="Link",
|
|
fields=[
|
|
(
|
|
"id",
|
|
models.BigAutoField(
|
|
auto_created=True,
|
|
primary_key=True,
|
|
serialize=False,
|
|
verbose_name="ID",
|
|
),
|
|
),
|
|
("title", models.TextField()),
|
|
(
|
|
"dest_file",
|
|
models.ForeignKey(
|
|
default="",
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name="inbound_links",
|
|
to="roam.file",
|
|
),
|
|
),
|
|
(
|
|
"source_file",
|
|
models.ForeignKey(
|
|
default="",
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name="outbound_links",
|
|
to="roam.file",
|
|
),
|
|
),
|
|
],
|
|
),
|
|
migrations.CreateModel(
|
|
name="Heading",
|
|
fields=[
|
|
(
|
|
"node_id",
|
|
models.CharField(max_length=256, primary_key=True, serialize=False),
|
|
),
|
|
("level", models.IntegerField()),
|
|
("title", models.TextField()),
|
|
(
|
|
"path",
|
|
models.ForeignKey(
|
|
db_column="path",
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
to="roam.file",
|
|
),
|
|
),
|
|
(
|
|
"inbound_headings",
|
|
models.ManyToManyField(
|
|
related_name="outbound_headings",
|
|
through="roam.Link",
|
|
to="roam.heading",
|
|
),
|
|
),
|
|
],
|
|
),
|
|
migrations.AddField(
|
|
model_name="link",
|
|
name="source_heading",
|
|
field=models.ForeignKey(
|
|
default="",
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name="outbound_links",
|
|
to="roam.heading",
|
|
),
|
|
preserve_default=False,
|
|
),
|
|
migrations.RemoveField(
|
|
model_name="link",
|
|
name="dest_file",
|
|
),
|
|
migrations.CreateModel(
|
|
name="Reference",
|
|
fields=[
|
|
(
|
|
"id",
|
|
models.BigAutoField(
|
|
auto_created=True,
|
|
primary_key=True,
|
|
serialize=False,
|
|
verbose_name="ID",
|
|
),
|
|
),
|
|
("ref", models.CharField(max_length=256)),
|
|
(
|
|
"heading",
|
|
models.ForeignKey(
|
|
db_column="node_id",
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
to="roam.heading",
|
|
),
|
|
),
|
|
],
|
|
),
|
|
migrations.CreateModel(
|
|
name="Tag",
|
|
fields=[
|
|
(
|
|
"id",
|
|
models.BigAutoField(
|
|
auto_created=True,
|
|
primary_key=True,
|
|
serialize=False,
|
|
verbose_name="ID",
|
|
),
|
|
),
|
|
("tag", models.CharField(max_length=256)),
|
|
(
|
|
"heading",
|
|
models.ForeignKey(
|
|
db_column="node_id",
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
to="roam.heading",
|
|
),
|
|
),
|
|
],
|
|
options={
|
|
"unique_together": {("heading_id", "tag")},
|
|
},
|
|
),
|
|
migrations.AddField(
|
|
model_name="link",
|
|
name="dest_heading",
|
|
field=models.ForeignKey(
|
|
db_constraint=False,
|
|
default=None,
|
|
null=True,
|
|
on_delete=django.db.models.deletion.CASCADE,
|
|
related_name="inbound_links",
|
|
to="roam.heading",
|
|
),
|
|
),
|
|
]
|
|
#+END_SRC
|
|
|
|
#+begin_src python :tangle roam/migrations/__init__.py
|
|
#+end_src
|
|
|
|
* NEXT tests
|
|
|
|
** NEXT create a test harness for =create_from_arroyo= tests...
|
|
|
|
* Admin
|
|
|
|
#+begin_src python :tangle roam/admin.py
|
|
from django.contrib import admin
|
|
|
|
from django.contrib import admin
|
|
import roam.models
|
|
|
|
class KeywordInline(admin.TabularInline):
|
|
model = roam.models.Keyword
|
|
|
|
class HeadingInline(admin.TabularInline):
|
|
model = roam.models.Heading
|
|
|
|
class LinkInline(admin.TabularInline):
|
|
model = roam.models.Link
|
|
fk_name = "source_heading"
|
|
|
|
class PropertyInline(admin.TabularInline):
|
|
model = roam.models.HeadingProperty
|
|
|
|
class TagInline(admin.TabularInline):
|
|
model = roam.models.Tag
|
|
|
|
class ReferenceInline(admin.TabularInline):
|
|
model = roam.models.Reference
|
|
|
|
|
|
@admin.register(roam.models.HeadingProperty)
|
|
class PropertyAdmin(admin.ModelAdmin):
|
|
list_display = ["heading", "keyword", "value"]
|
|
|
|
|
|
@admin.register(roam.models.Keyword)
|
|
class KeywordAdmin(admin.ModelAdmin):
|
|
list_display = ["path", "keyword", "value"]
|
|
|
|
@admin.register(roam.models.Reference)
|
|
class ReferenceAdmin(admin.ModelAdmin):
|
|
list_display = ["heading", "ref"]
|
|
|
|
@admin.register(roam.models.Tag)
|
|
class TagAdmin(admin.ModelAdmin):
|
|
list_display = ["heading", "tag"]
|
|
|
|
@admin.register(roam.models.File)
|
|
class FileAdmin(admin.ModelAdmin):
|
|
inlines = [
|
|
KeywordInline,
|
|
HeadingInline,
|
|
]
|
|
|
|
@admin.register(roam.models.Heading)
|
|
class HeadingAdmin(admin.ModelAdmin):
|
|
list_display = ["node_id", "path"]
|
|
inlines = [
|
|
TagInline,
|
|
ReferenceInline,
|
|
PropertyInline,
|
|
LinkInline,
|
|
]
|
|
|
|
admin.site.register(roam.models.Link)
|
|
#+end_src
|
|
|
|
* Views
|
|
|
|
there probably aren't any but for now:
|
|
|
|
#+begin_src python :tangle roam/views.py
|
|
from django.shortcuts import render
|
|
|
|
# Create your views here.
|
|
#+end_src
|
|
|
|
* The Rest
|
|
|
|
#+begin_src python :tangle roam/apps.py
|
|
from django.apps import AppConfig
|
|
|
|
|
|
class RoamConfig(AppConfig):
|
|
default_auto_field = "django.db.models.BigAutoField"
|
|
name = "roam"
|
|
#+end_src
|
|
|
|
* NEXT move this in to [[id:arroyo/arroyo][Arroyo Systems Management]], along with the [[id:arroyo/django/generators][The Arroyo Generators]].
|