arcology/roam/models.py

285 lines
8.9 KiB
Python

# [[file:../roam.org::*Org-Roam Caching Models][Org-Roam Caching Models:1]]
from __future__ import annotations
import hashlib
from typing import List, Set
from django.db import models
from django.conf import settings
from django.utils.module_loading import import_string
from django_prometheus.models import ExportModelOperationsMixin as EMOM
import arroyo.arroyo_rs as native
import logging
logger = logging.getLogger(__name__)
# Org-Roam Caching Models:1 ends here
# [[file:../roam.org::*File][File:1]]
def calculate_hash(path: str) -> str:
with open(path, "rb") as f:
digest = hashlib.sha256(f.read())
return digest.hexdigest()
class File(EMOM('file'), models.Model):
path = models.CharField(max_length=512, primary_key=True)
digest = models.CharField(max_length=512)
# inbound_files = models.ManyToManyField(
# "File",
# through="Link",
# related_name="outbound_files",
# through_fields=("dest_file", "source_file")
# )
def hash_updated(self) -> bool:
file_hash = calculate_hash(self.path)
logger.debug(f"old: {self.digest} new: {file_hash}")
return file_hash != self.digest
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> File:
return cls.objects.get_or_create(
path=doc.path,
digest=calculate_hash(doc.path),
)[0]
# File:1 ends here
# [[file:../roam.org::*Keyword][Keyword:1]]
class Keyword(EMOM('keyword'), models.Model):
class Meta:
# XXX: how do i get out of this? i think i just have to assume
# that there will be duplicate, and these cannot be unique, have
# to be filter()'d for
unique_together = (("path", "keyword", "value"),)
path = models.ForeignKey(
File,
on_delete=models.CASCADE,
db_column="path",
)
keyword = models.CharField(max_length=512)
value = models.CharField(max_length=512)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Keyword]:
def allowed_kw(kw: native.Keyword):
return kw.keyword in settings.ROAM_ALLOWED_KEYWORDS
return [
cls.objects.get_or_create(
path=File(path=kw.file),
keyword=kw.keyword,
value=kw.value,
)[0]
for kw in doc.keywords
if allowed_kw(kw)
]
# Keyword:1 ends here
# [[file:../roam.org::*Heading][Heading:1]]
class Heading(EMOM('heading'), models.Model):
node_id = models.CharField(max_length=256, primary_key=True)
level = models.IntegerField()
title = models.TextField()
path = models.ForeignKey(
File,
on_delete=models.CASCADE,
db_column="path",
)
# reverse accessor created therein
inbound_headings = models.ManyToManyField(
"Heading",
through="Link",
related_name="outbound_headings",
through_fields=("dest_heading", "source_heading"),
)
def to_url(self) -> str:
page = self.path.page_set.first()
page_url = page.to_url()
if self.level == 0:
return page_url
else:
return f"{page_url}#{self.node_id}"
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Heading]:
return [
cls.objects.get_or_create(
node_id=heading.id,
level=heading.level,
title=heading.text,
path=File(path=doc.path),
)[0]
for heading in doc.headings or []
if heading.id is not None
]
# Heading:1 ends here
# [[file:../roam.org::*Properties][Properties:1]]
class HeadingProperty(EMOM('heading_property'), models.Model):
heading = models.ForeignKey(
Heading,
on_delete=models.CASCADE,
db_column="node_id",
)
keyword = models.CharField(max_length=256)
value = models.CharField(max_length=256)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Tag]:
return [
cls.objects.get_or_create(
heading=Heading.objects.get(node_id=heading.id),
keyword=key, value=value
)[0]
for heading in doc.headings or []
for key, value in (heading.properties or {}).items()
if heading.id is not None
]
# Properties:1 ends here
# [[file:../roam.org::*Tag][Tag:1]]
class Tag(EMOM('tag'), models.Model):
class Meta:
unique_together = (("heading_id", "tag"),)
heading = models.ForeignKey(
Heading,
on_delete=models.CASCADE,
db_column="node_id",
)
tag = models.CharField(max_length=256)
def related_pages(self) -> Set['arcology.models.Page']:
return set(self.__class__.pages_by_name(self.tag))
@classmethod
def weighted_pages_by_name(cls, tag_name):
pages = cls.pages_by_name(tag_name)
ret = dict()
for page in pages:
ret[page] = ret.get(page, 0) + 1
return ret
@classmethod
def pages_by_name(cls, tag_name: str) -> List['arcology.models.Page']:
return [
tag_obj.heading.path.page_set.first()
for tag_obj in cls.objects.filter(tag=tag_name).distinct()
]
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Tag]:
return [
cls.objects.get_or_create(
heading=Heading.objects.get(node_id=heading.id), tag=tag
)[0]
for heading in doc.headings or []
for tag in heading.tags or []
if heading.id is not None
]
# Tag:1 ends here
# [[file:../roam.org::*Reference][Reference:1]]
class Reference(EMOM('reference'), models.Model):
heading = models.ForeignKey(
Heading,
on_delete=models.CASCADE,
db_column="node_id",
)
ref = models.CharField(max_length=256)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Reference]:
return [
cls.objects.get_or_create(
heading=Heading.objects.get(node_id=heading.id), ref=ref
)[0]
for heading in doc.headings or []
for ref in heading.refs or []
if heading.id is not None
]
# Reference:1 ends here
# [[file:../roam.org::*Link][Link:1]]
class Link(EMOM('link'), models.Model):
title = models.TextField()
source_file = models.ForeignKey(
File,
related_name="outbound_links",
on_delete=models.CASCADE,
)
source_heading = models.ForeignKey(
Heading,
related_name="outbound_links",
on_delete=models.CASCADE,
to_field="node_id",
)
dest_heading = models.ForeignKey(
Heading,
related_name="inbound_links",
on_delete=models.DO_NOTHING,
db_constraint=False,
null=True,
default=None,
to_field="node_id",
)
def __repr__(self) -> str:
return f"<Link (from: {self.source_heading_id}, to: {self.dest_heading_id}, text: {self.title})>"
def __str__(self) -> str:
return self.__repr__()
def to_backlink_html(self) -> str:
try:
h = self.source_heading
page = h.path.page_set.first()
url = h.to_url()
title = page.title
return f'''<a class="internal" href="{url}">{title}</a>'''
except Heading.DoesNotExist:
logger.info(f"{self} does not have dest heading.")
return f'''<a class="dead-link" href="/404?text={self.title|iriencode}">{self.title}</a>'''
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Link]:
heading_trail = []
ret = []
for heading in doc.headings:
if heading.id is not None:
# reset breadcrumb trail
heading_trail = heading_trail[(heading.level-1):]
heading_trail += [heading.id]
for link in heading.links or []:
if link.to_proto == "id":
logger.debug(f"link: {link}")
logger.debug(f"dest: {link.to}")
obj = cls(title=(link.text or ""))
obj.source_file = File.objects.get(path=doc.path)
obj.source_heading = Heading.objects.get(node_id=heading_trail[-1:][0])
# fudge this since we may be linking to Headings which are not yet indexed
# dest_heading = Heading.objects.get(node_id=dest_id)
# obj.dest_file = dest_heading.path
obj.dest_heading_id = link.to
logger.warn(f"save maybe {obj}")
obj.save()
ret.append(obj)
else:
# create a pseudo-link or a link that can be resolved using Reference?
# dest_id = ""
# dest_file = link.to
logger.warn(f"Skipping non-id link {link}")
return ret
# Link:1 ends here