arcology/arcology/models.py

305 lines
9.8 KiB
Python

# [[file:../arcology.org::*Data Models for Sites, Web Features, and Feeds][Data Models for Sites, Web Features, and Feeds:1]]
from __future__ import annotations
from typing import Optional, List
from django.db import models
from django.conf import settings
from django_prometheus.models import ExportModelOperationsMixin as EMOM
import arrow
import arroyo.arroyo_rs as native
from arcology.cache_decorator import cache
import roam.models
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)
# used for some memoization
class hashabledict(dict):
def __hash__(self):
return hash(tuple(sorted(self.items())))
# Data Models for Sites, Web Features, and Feeds:1 ends here
# [[file:../arcology.org::*Site][Site:1]]
# Sites and SiteDomains are created in django-admin or a seed rather than from arroyo parser, no create_from_arroyo..!
class Site(EMOM('site'), models.Model):
key = models.CharField(max_length=512, primary_key=True)
title = models.CharField(max_length=512)
# add choices
css_file = models.CharField(max_length=512, blank=True, default=None)
# this is used in sitemap, and maybe links..
link_color = models.CharField(max_length=8, blank=True, default=None)
def urlize_page(self, page: Page, heading: Optional[roam.models.Heading] = None):
domain = self.sitedomain_set.first().domain
key_rest = page.route_key.split("/", 1)[1]
url = f"https://{domain}/{key_rest}"
if heading is not None:
url = url + f"#{heading.node_id}"
return url
def urlize_feed(self, feed: Feed):
domain = self.sitedomain_set.first().domain
key_rest = feed.route_key.split("/", 1)[1]
url = f"https://{domain}/{key_rest}"
return url
@classmethod
def from_route(cls: Site, route_key: str) -> Site:
site_key = route_key.split("/")[0]
site = cls.objects.get(key=site_key)
assert site is not None
return site
@classmethod
def from_request(cls: Site, request) -> Site:
host = request.headers.get("Host")
site = cls.objects.filter(sitedomain__domain=host).first()
assert site is not None
return site
class SiteDomain(EMOM('site_domain'), models.Model):
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
domain = models.CharField(max_length=512)
# Site:1 ends here
# [[file:../arcology.org::*Page][Page:1]]
class Page(EMOM('page'), models.Model):
file = models.ForeignKey(
roam.models.File,
on_delete=models.CASCADE,
)
route_key = models.CharField(max_length=512, primary_key=True)
root_heading = models.ForeignKey(roam.models.Heading, on_delete=models.CASCADE)
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
title = models.CharField(max_length=512)
allow_crawl = models.BooleanField(default=False)
def to_url(self):
site = self.site
return site.urlize_page(self)
def to_url_path(self):
key_rest = self.route_key.split("/", 1)[1]
return f"/{key_rest}"
def collect_keywords(self):
return self.file.keyword_set
def collect_tags(self):
return [
tag
for heading in self.file.heading_set.all()
for tag in heading.tag_set.all()
]
def collect_references(self):
return [
reference
for heading in self.file.heading_set.all()
for reference in heading.reference_set.all()
]
def collect_links(self):
my_headings = self.file.heading_set.all()
link_objs = self.file.outbound_links.all()
ret = {
h.node_id: h.to_url() for h in my_headings
}
for el in link_objs:
try:
h = el.dest_heading
url = h.to_url()
ret[h.node_id] = url
logger.info(f"link {url} from {el}")
except roam.models.Heading.DoesNotExist:
logger.info(f"{el} does not have dest")
return ret
def collect_backlinks(self) -> List[Link]:
my_headings = self.file.heading_set.all()
return set(roam.models.Link.objects.filter(dest_heading__in=my_headings))
def to_html(self, links, heading=None, include_subheadings=False):
return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest)
@cache(key_prefix="page_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, heading, include_subheadings, _file_digest):
if heading is not None:
headings = [heading]
else:
headings = []
opts = native.ExportOptions(
link_retargets=links,
limit_headings=headings,
include_subheadings=include_subheadings,
ignore_tags=settings.IGNORED_ROAM_TAGS,
)
return native.htmlize_file(self.file.path, opts)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> Page:
f = roam.models.File.objects.get(path=doc.path)
route_key = next(iter(doc.collect_keywords("ARCOLOGY_KEY")), "")
allow_crawl = (
next(iter(doc.collect_keywords("ARCOLOGY_ALLOW_CRAWL")), False) is not False
)
site = Site.from_route(route_key)
root_heading = f.heading_set.filter(level=0)[0]
title = root_heading.title or ""
return cls.objects.get_or_create(
file=f,
route_key=route_key,
allow_crawl=allow_crawl,
site=site,
root_heading=root_heading,
title=title,
)[0]
# Page:1 ends here
# [[file:../arcology.org::*Feed][Feed:1]]
class Feed(EMOM('feed'), models.Model):
POST_VISIBILITY = [
("unlisted", "Unlisted"),
("private", "Private"),
("public", "Public"),
("direct", "direct"), # might be different, XXX
]
file = models.ForeignKey(
roam.models.File,
on_delete=models.CASCADE,
)
route_key = models.CharField(max_length=512, primary_key=True)
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
title = models.CharField(max_length=512)
visibility = models.CharField(max_length=512, choices=POST_VISIBILITY)
def url(self):
return self.site.urlize_feed(self)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> Feed | None:
route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None)
if not route_key:
return None
visibility = next(
iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private"
)
f = roam.models.File.objects.get(path=doc.path)
site = Site.from_route(route_key)
root_heading = f.heading_set.filter(level=0)[0]
title = root_heading.title
return cls.objects.get_or_create(
file=f,
route_key=route_key,
title=title,
visibility=visibility,
site=site,
)[0]
@classmethod
async def aget(cls, **kwargs):
return await cls.objects.prefetch_related("file", "site").aget(
**kwargs
)
# Feed:1 ends here
# [[file:../arcology.org::*FeedEntry][FeedEntry:1]]
class FeedEntry(EMOM('feed_entry'), models.Model):
POST_VISIBILITY = [
("unlisted", "Unlisted"),
("private", "Private"),
("public", "Public"),
("direct", "direct"), # might be different, XXX
]
heading = models.ForeignKey(
roam.models.Heading,
on_delete=models.CASCADE,
)
feed = models.ForeignKey(
Feed,
on_delete=models.CASCADE,
)
route_key = models.CharField(max_length=512)
site = models.ForeignKey(
Site,
on_delete=models.CASCADE,
)
title = models.CharField(max_length=512)
visibility = models.CharField(max_length=512, choices=POST_VISIBILITY)
pubdate = models.DateTimeField(auto_now=False)
def to_html(self, links):
return self._to_html_memoized(hashabledict(links), self.heading.path.digest)
@cache(key_prefix="feedentry_html", expire_secs=60*60*24*7)
def _to_html_memoized(self, links, _file_digest):
opts = native.ExportOptions(
link_retargets=links,
limit_headings=[self.heading.node_id],
include_subheadings=True,
ignore_tags=settings.IGNORED_ROAM_TAGS,
)
return native.htmlize_file(self.heading.path.path, opts)
@classmethod
def create_from_arroyo(cls, doc: native.Document) -> List[Feed] | None:
route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None)
if not route_key:
return None
visibility = next(
iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private"
)
site = Site.from_route(route_key)
# f = roam.models.File.objects.get(path=doc.path)
feed = Feed.objects.get(route_key=route_key)
rets = []
for nheading in doc.headings:
if nheading.id is not None:
heading = roam.models.Heading.objects.get(node_id=nheading.id)
pdqs = heading.headingproperty_set.filter(keyword="PUBDATE")
if not pdqs.exists():
continue
v = pdqs.first().value
pubdate = arrow.get(v, "YYYY-MM-DD ddd H:mm").format(arrow.FORMAT_RFC3339)
title = heading.title
rets += [cls.objects.get_or_create(
heading=heading,
feed=feed,
route_key=route_key,
title=title,
pubdate=pubdate,
visibility=visibility,
site=site,
)[0]]
# root_heading = f.heading_set.filter(level=0)[0]
# title = root_heading.title
return rets
# FeedEntry:1 ends here