Compare commits

...

3 Commits

8 changed files with 360 additions and 157 deletions

View File

@ -854,7 +854,7 @@ Here's the top-matter:
#+begin_src rust
use anyhow::Result;
use pyo3::prelude::*;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::convert::From;
use std::fs;
use std::io::{Error, Write};
@ -868,6 +868,13 @@ use orgize::{Element, Org};
The Exporter is controlled by passing in a struct with (currently) just one thing inside of it, a map of IDs -> public URLs to rewrite them in to.[fn:1:I'll still need to add a way to rewrite missing links in to 404/stub pages but for now they are just left as-is, but this is fine] This thing is a boilerplate =pyo3= class with a constructor attached to it.
The Exporter is controlled by passing in a structure with a few configuration options:
- =link_retargets= maps org IDs to public URLs to rewrite them for the web
- =ignore_tags= is a list of tags which will cause the exporter to not include that heading or any of its children in the final document
- =limit_headings= is a set of org IDs; if this is not empty, the Exporter will *only* export these headings. This will be called "subheading mode"
- =include_subheadings= will instruct subheading mode to also export child headings underneath the ones indicated by =limit_headings=. One hopes the interaction of these two options in the code below will make the semantics clear.
#+begin_src rust
#[derive(Default, Debug, Clone)]
#[pyclass(dict)]
@ -875,14 +882,32 @@ pub struct ExportOptions {
/// id:{the_id} -> URL rewrites
#[pyo3(get)]
pub link_retargets: HashMap<String, String>,
#[pyo3(get)]
pub ignore_tags: HashSet<String>,
#[pyo3(get)]
pub limit_headings: HashSet<String>,
#[pyo3(get)]
pub include_subheadings: bool,
}
#[pymethods]
impl ExportOptions {
#[new]
fn new(link_retargets: HashMap<String, String>) -> Self {
fn new(
link_retargets: HashMap<String, String>,
ignore_tags: Vec<String>,
limit_headings: Vec<String>,
include_subheadings: Option<bool>,
) -> Self {
let mut lh2 = HashSet::new();
lh2.extend(limit_headings);
let mut tags = HashSet::new();
tags.extend(ignore_tags);
ExportOptions {
link_retargets,
limit_headings: lh2,
ignore_tags: tags,
include_subheadings: include_subheadings.unwrap_or(false),
..Default::default()
}
}
@ -908,7 +933,9 @@ pub struct ArroyoHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
/// handler error type
pub error_type: PhantomData<E>,
/// file-property drawer state tracking
in_drawer: bool,
current_drawer: Option<String>,
in_public_heading: bool,
heading_breadcrumbs: Vec<String>,
}
impl<E: From<Error>, H: HtmlHandler<E>> ArroyoHtmlHandler<E, H> {
@ -933,7 +960,10 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoHtmlHandler<E, H> {
ArroyoHtmlHandler {
inner: H::default(),
error_type: PhantomData,
in_drawer: false,
current_drawer: None,
in_public_heading: false,
heading_breadcrumbs: vec![],
options: ExportOptions::default(),
}
}
@ -947,17 +977,74 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoHtmlHandler<E, H> {
#+begin_src rust
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
if self.in_drawer {
return Ok(());
}
match &self.current_drawer {
None => {}
Some(drawer_name) => {
if vec![
String::from("PROPERTIES"),
String::from("REVIEW_DATA"),
String::from("LOGBOOK"),
]
.contains(&drawer_name)
{
return Ok(());
}
}
};
// if !self.in_public_heading {
// return Ok(());
// }
match element {
#+end_src
#+begin_src rust
Element::Title(title) => {
// title.tags
// let has_ignore_tag = title
// .tags
// .clone()
// .into_iter()
// .map(String::from)
// .find(|v| self.options.ignore_tags.contains(v))
// .is_some();
// if has_ignore_tag
let properties = title.properties.clone().into_hash_map();
let our_new_id = properties.get("ID");
let our_level = title.level;
self.heading_breadcrumbs.truncate(our_level);
match our_new_id.clone() {
None => {}
Some(id) => {
self.heading_breadcrumbs.push(id.to_string());
}
}
dbg!(&self.heading_breadcrumbs);
dbg!(&self.options.limit_headings);
let breadcrumb_set =
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
self.in_public_heading = self
.options
.limit_headings
.intersection(&breadcrumb_set)
.count()
!= 0;
dbg!(self.in_public_heading);
if self.in_public_heading {
self.inner.start(w, &element)?
}
}
#+end_src
Because =orgize= doesn't parse the file-level =PROPERTIES= drawer, they're elided from the export. [[id:2e31b385-a003-4369-a136-c6b78c0917e1][org-fc]] state drawers are, too.
#+begin_src rust
Element::Drawer(drawer) => {
self.in_drawer = drawer.name == "PROPERTIES" || drawer.name == "REVIEW_DATA"
self.current_drawer = Some(drawer.name.to_string());
}
#+end_src
@ -973,11 +1060,14 @@ Text parsing is a bit weird to handle rewriting [[id:2e31b385-a003-4369-a136-c6b
",
)
.unwrap();
let after = re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
if self.in_public_heading {
let after =
re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
}
}
}
#+end_src
@ -992,20 +1082,23 @@ Link exporting is going to be the most complicated part of this because it does
None => ("", string_path),
};
let desc = link.desc.clone().unwrap_or(link.path.clone());
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
if self.in_public_heading {
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
}
}
}
#+end_src
@ -1013,7 +1106,11 @@ Link exporting is going to be the most complicated part of this because it does
Everything else is passed along to Syntect or the default HTML Handler.
#+begin_src rust
_ => self.inner.start(w, element)?,
_ => {
if self.in_public_heading {
self.inner.start(w, element)?
}
}
}
Ok(())
}
@ -1022,10 +1119,13 @@ Everything else is passed along to Syntect or the default HTML Handler.
match element {
// reset the drawer state tracking
Element::Drawer(_drawer) => {
self.in_drawer = false;
self.current_drawer = None;
}
_ => {
if self.in_drawer {
if !self.in_public_heading {
return Ok(());
}
if self.current_drawer.is_some() {
return Ok(());
}
self.inner.end(w, element)?
@ -1096,7 +1196,10 @@ For now maybe it is easier to assume that the headings are all in one file; that
the primary tension of the arroyo library now is that its design context is only in the realm of the arcology project's design goals. I need to start deciding whether a design goal of this library is to support non-arcology document systems. surely interoperable but different document systems could be built on top of arroyo
** First Pass
** CANCELLED First Pass
:LOGBOOK:
- State "CANCELLED" from [2024-02-04 Sun 16:02]
:END:
so the first pass of this API could take a file path, extract the feed metadata from keywords and heading properties; it could construct an entire atom feed, falling back to the custom HTML exporter to fill out the feed with text content. That's probably fine, and an API that other document servers could work with.
@ -1586,13 +1689,27 @@ impl Write for &mut InternalWriter {
}
#+end_src
** About the First Pass
I hate that code. It was worth a try, but it's not good, it's super janky. I'm going to add sub-heading support and compose the feeds in the Django side. this API is cleaner but with a different separation of concerns[citation needed]. This can be done by just adding an ExportOption and struct state variable tracking whether the parser has reached a heading it should be exporting.
The Exporter design model is fine, the whole thing where you can nest them. but the code, my rust ability, and the structure of the element iterator in the orgize library make it sort of bodgy and difficult to understand or change, even though there is a literate discussion surrounding it. a subheading export API can be unit tested in ways the exporter cannot.
so the second pass:
** Second API
there's a step further on, where an API takes a list of headings and feed metadata, and it parses each heading and its subheadings to HTML, *which is an API I already want to provide to document systems*. it could take arbitrary document headings provided through the public interface, and construct multi-page feeds.
there's a another option, where an API takes a list of headings and feed metadata, and it parses each heading and its subheadings to HTML. *this is an API I already want to provide to document systems*, and should be written. it could take arbitrary document headings provided through the public interface, and construct multi-page feeds.
this requires the ability to export only a given subheading, which I could implement maybe more simply than the mess I wrote in the first pass.
or we could just clobber together a version of [[https://github.com/tanrax/RSSingle][RSSingle]]; [[id:personal_software_can_be_shitty][Personal Software Can Be Shitty]].
This API could be memoized in the python side with functools.cache so that the headings could be exported.
this would allow me to microblog from my Journal, by allowing feeds to contain headings from arbitrary pages. this is Good. so let's do that.
** Hacky solution
we could just clobber together a version of [[https://github.com/tanrax/RSSingle][RSSingle]]; [[id:personal_software_can_be_shitty][Personal Software Can Be Shitty]].
** Future API
@ -1610,7 +1727,7 @@ use pyo3::prelude::*;
pub mod parse;
pub mod export_html;
pub mod export_atom;
// pub mod export_atom;
pub mod types;
#[pymodule]
@ -1625,10 +1742,10 @@ fn arroyo_rs(py: Python, m: &PyModule) -> PyResult<()> {
Ok(export_html::htmlize_file(path, options)?)
}
#[pyfn(m)]
fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
Ok(export_atom::atomize_file(path, options)?)
}
// #[pyfn(m)]
// fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
// Ok(export_atom::atomize_file(path, options)?)
// }
m.add_class::<types::Document>()?;
m.add_class::<types::Heading>()?;
@ -1786,7 +1903,7 @@ Stub package interface
#+begin_src python :tangle arroyo/__init__.py :mkdirp yes
from .arroyo_rs import parse_file, InvalidDocError
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
#+end_src
** Click command wrapper
@ -1803,9 +1920,10 @@ This is a stub, this module probably doesn't need to be runnable.
import os
import click
import glob
from typing import Optional
# from . import persist_one_file
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
# from . import models
# from sqlmodel import Session
#+end_src
@ -1843,21 +1961,17 @@ def generate_db(source, dest, file_glob):
@cli.command()
@click.option("--file", "-f", help="The file to export")
def export_document(file):
@click.option("--limit-headings", "-H", multiple=True, help="org ID to export")
@click.option("--include-subheadings", "-I", help="when headings are specified, this will control whether to export child headings")
def export_document(file, limit_headings: Optional[set] = None, include_subheadings=False):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
link_retargets = {"currently_reading": "https://rix.si/hello-world"},
limit_headings = limit_headings,
include_subheadings = include_subheadings,
ignore_tags = [],
)
print(htmlize_file(file, options))
@cli.command()
@click.option("--file", "-f", help="The file to export")
def atomize_document(file):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
)
print(atomize_file(file, options))
#+end_src
#+begin_src python

View File

@ -1,4 +1,4 @@
# [[file:../arroyo-native-parser.org::*Python Package][Python Package:1]]
from .arroyo_rs import parse_file, InvalidDocError
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
# Python Package:1 ends here

View File

@ -2,9 +2,10 @@
import os
import click
import glob
from typing import Optional
# from . import persist_one_file
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
# from . import models
# from sqlmodel import Session
# Click command wrapper:1 ends here
@ -40,21 +41,17 @@ def generate_db(source, dest, file_glob):
@cli.command()
@click.option("--file", "-f", help="The file to export")
def export_document(file):
@click.option("--limit-headings", "-H", multiple=True, help="org ID to export")
@click.option("--include-subheadings", "-I", help="when headings are specified, this will control whether to export child headings")
def export_document(file, limit_headings: Optional[set] = None, include_subheadings=False):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
link_retargets = {"currently_reading": "https://rix.si/hello-world"},
limit_headings = limit_headings,
include_subheadings = include_subheadings,
ignore_tags = [],
)
print(htmlize_file(file, options))
@cli.command()
@click.option("--file", "-f", help="The file to export")
def atomize_document(file):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
)
print(atomize_file(file, options))
# Click command wrapper:2 ends here
# [[file:../arroyo-native-parser.org::*Click command wrapper][Click command wrapper:3]]

View File

@ -1,4 +1,4 @@
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:1]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:1]]
use anyhow::Result;
use regex;
use std::borrow::Cow;
@ -11,9 +11,9 @@ use orgize::{elements, Element, Org};
use crate::export_html::ArroyoHtmlHandler;
use crate::export_html::ExportOptions;
// First Pass:1 ends here
// CANCELLED First Pass:1 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:2]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:2]]
pub struct ArroyoAtomHandler<E: From<Error>, H: HtmlHandler<E>> {
pub options: ExportOptions,
pub inner: ArroyoHtmlHandler<E, H>,
@ -61,9 +61,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoAtomHandler<E, H> {
}
}
}
// First Pass:2 ends here
// CANCELLED First Pass:2 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:3]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:3]]
pub fn atomize_file(path: String, options: ExportOptions) -> Result<String> {
let syntect_handler = SyntectHtmlHandler::new(DefaultHtmlHandler);
let html_handler = ArroyoHtmlHandler::new(options.clone(), syntect_handler);
@ -90,74 +90,74 @@ pub fn atomize_file(path: String, options: ExportOptions) -> Result<String> {
org_tree.write_html_custom(&mut vec, &mut handler)?;
Ok(String::from_utf8(vec)?)
}
// First Pass:3 ends here
// CANCELLED First Pass:3 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:4]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:4]]
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoAtomHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
(match element {
// First Pass:4 ends here
// CANCELLED First Pass:4 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:5]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:5]]
Element::Document { .. } => self.start_document(w, element),
// First Pass:5 ends here
// CANCELLED First Pass:5 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:6]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:6]]
Element::Keyword(kw) => self.start_keyword(w, kw),
// First Pass:6 ends here
// CANCELLED First Pass:6 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:7]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:7]]
Element::Title(title) => self.start_title(w, title),
// First Pass:7 ends here
// CANCELLED First Pass:7 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:8]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:8]]
Element::Drawer(drawer) => {
self.in_drawer = drawer.name == "PROPERTIES" && self.feed_page_id.eq("");
self.start_rest(w, element)
}
Element::Text { value } => self.start_text(w, value),
// First Pass:8 ends here
// CANCELLED First Pass:8 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:9]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:9]]
_t => self.start_rest(w, element),
})
.unwrap(); // if we can't parse something, just fucken panic.
// First Pass:9 ends here
// CANCELLED First Pass:9 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:10]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:10]]
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
(match element {
// First Pass:10 ends here
// CANCELLED First Pass:10 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:11]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:11]]
// Element::Title(_title) => {}
Element::Document { .. } => self.end_document(w, element),
// First Pass:11 ends here
// CANCELLED First Pass:11 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:12]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:12]]
Element::Drawer(drawer) => {
self.in_drawer = false;
self.end_rest(w, element)
}
// First Pass:12 ends here
// CANCELLED First Pass:12 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:13]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:13]]
_ => self.end_rest(w, element),
})
.ok();
Ok(())
}
}
// First Pass:13 ends here
// CANCELLED First Pass:13 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:14]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:14]]
impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
// First Pass:14 ends here
// CANCELLED First Pass:14 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:15]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:15]]
fn start_document<W: Write>(&mut self, mut w: W, _document: &elements::Element) -> Result<()> {
Ok(write!(
w,
@ -178,9 +178,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
)?;
Ok(())
}
// First Pass:15 ends here
// CANCELLED First Pass:15 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:16]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:16]]
fn start_title<W: Write>(&mut self, mut w: W, title: &elements::Title) -> Result<()> {
let ignore_tags = vec![
String::from("noexport"),
@ -254,9 +254,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
Ok(())
}
}
// First Pass:16 ends here
// CANCELLED First Pass:16 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:17]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:17]]
fn start_keyword<W: Write>(&mut self, mut w: W, kw: &elements::Keyword) -> Result<()> {
// dbg!(kw);
match kw.key.as_ref() {
@ -292,9 +292,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
}
Ok(())
}
// First Pass:17 ends here
// CANCELLED First Pass:17 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:18]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:18]]
fn start_text<W: Write>(&mut self, mut w: W, text: &Cow<str>) -> Result<()> {
if self.in_drawer == true {
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
@ -346,7 +346,7 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
Ok(())
}
}
// First Pass:18 ends here
// CANCELLED First Pass:18 ends here
// [[file:../arroyo-native-parser.org::*Strip Links from Strings][Strip Links from Strings:1]]
fn strip_links_from_str(in_str: &str) -> Result<String> {

View File

@ -1,7 +1,7 @@
// [[file:../arroyo-native-parser.org::*The HTML exporter][The HTML exporter:1]]
// [[file:../arroyo-native-parser.org::*The Arroyo HTML exporter][The Arroyo HTML exporter:1]]
use anyhow::Result;
use pyo3::prelude::*;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::convert::From;
use std::fs;
use std::io::{Error, Write};
@ -11,23 +11,41 @@ use regex::Regex;
use orgize::export::{DefaultHtmlHandler, HtmlEscape, HtmlHandler, SyntectHtmlHandler};
use orgize::{Element, Org};
// The HTML exporter:1 ends here
// The Arroyo HTML exporter:1 ends here
// [[file:../arroyo-native-parser.org::*The HTML exporter][The HTML exporter:2]]
// [[file:../arroyo-native-parser.org::*The Arroyo HTML exporter][The Arroyo HTML exporter:2]]
#[derive(Default, Debug, Clone)]
#[pyclass(dict)]
pub struct ExportOptions {
/// id:{the_id} -> URL rewrites
#[pyo3(get)]
pub link_retargets: HashMap<String, String>,
#[pyo3(get)]
pub ignore_tags: HashSet<String>,
#[pyo3(get)]
pub limit_headings: HashSet<String>,
#[pyo3(get)]
pub include_subheadings: bool,
}
#[pymethods]
impl ExportOptions {
#[new]
fn new(link_retargets: HashMap<String, String>) -> Self {
fn new(
link_retargets: HashMap<String, String>,
ignore_tags: Vec<String>,
limit_headings: Vec<String>,
include_subheadings: Option<bool>,
) -> Self {
let mut lh2 = HashSet::new();
lh2.extend(limit_headings);
let mut tags = HashSet::new();
tags.extend(ignore_tags);
ExportOptions {
link_retargets,
limit_headings: lh2,
ignore_tags: tags,
include_subheadings: include_subheadings.unwrap_or(false),
..Default::default()
}
}
@ -41,9 +59,9 @@ impl ExportOptions {
// Self::__repr__(slf)
// }
}
// The HTML exporter:2 ends here
// The Arroyo HTML exporter:2 ends here
// [[file:../arroyo-native-parser.org::*The HTML exporter][The HTML exporter:3]]
// [[file:../arroyo-native-parser.org::*The Arroyo HTML exporter][The Arroyo HTML exporter:3]]
pub struct ArroyoHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
pub options: ExportOptions,
/// inner html handler
@ -51,7 +69,9 @@ pub struct ArroyoHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
/// handler error type
pub error_type: PhantomData<E>,
/// file-property drawer state tracking
in_drawer: bool,
current_drawer: Option<String>,
in_public_heading: bool,
heading_breadcrumbs: Vec<String>,
}
impl<E: From<Error>, H: HtmlHandler<E>> ArroyoHtmlHandler<E, H> {
@ -76,29 +96,88 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoHtmlHandler<E, H> {
ArroyoHtmlHandler {
inner: H::default(),
error_type: PhantomData,
in_drawer: false,
current_drawer: None,
in_public_heading: false,
heading_breadcrumbs: vec![],
options: ExportOptions::default(),
}
}
}
// The HTML exporter:3 ends here
// The Arroyo HTML exporter:3 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:1]]
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
if self.in_drawer {
return Ok(());
}
match &self.current_drawer {
None => {}
Some(drawer_name) => {
if vec![
String::from("PROPERTIES"),
String::from("REVIEW_DATA"),
String::from("LOGBOOK"),
]
.contains(&drawer_name)
{
return Ok(());
}
}
};
// if !self.in_public_heading {
// return Ok(());
// }
match element {
// The Custom HTML Exporter Extensions:1 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:2]]
Element::Drawer(drawer) => {
self.in_drawer = drawer.name == "PROPERTIES" || drawer.name == "REVIEW_DATA"
Element::Title(title) => {
// title.tags
// let has_ignore_tag = title
// .tags
// .clone()
// .into_iter()
// .map(String::from)
// .find(|v| self.options.ignore_tags.contains(v))
// .is_some();
// if has_ignore_tag
let properties = title.properties.clone().into_hash_map();
let our_new_id = properties.get("ID");
let our_level = title.level;
self.heading_breadcrumbs.truncate(our_level);
match our_new_id.clone() {
None => {}
Some(id) => {
self.heading_breadcrumbs.push(id.to_string());
}
}
dbg!(&self.heading_breadcrumbs);
dbg!(&self.options.limit_headings);
let breadcrumb_set =
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
self.in_public_heading = self
.options
.limit_headings
.intersection(&breadcrumb_set)
.count()
!= 0;
dbg!(self.in_public_heading);
if self.in_public_heading {
self.inner.start(w, &element)?
}
}
// The Custom HTML Exporter Extensions:2 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:3]]
Element::Drawer(drawer) => {
self.current_drawer = Some(drawer.name.to_string());
}
// The Custom HTML Exporter Extensions:3 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:4]]
Element::Text { value: before } => {
let re = Regex::new(
r"(?x)
@ -108,16 +187,19 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
",
)
.unwrap();
let after = re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
if self.in_public_heading {
let after =
re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
}
}
}
// The Custom HTML Exporter Extensions:3 ends here
// The Custom HTML Exporter Extensions:4 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:4]]
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:5]]
Element::Link(link) => {
let string_path = link.path.to_string();
let (proto, stripped_dest) = match string_path.split_once(':') {
@ -125,26 +207,33 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
None => ("", string_path),
};
let desc = link.desc.clone().unwrap_or(link.path.clone());
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
if self.in_public_heading {
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
}
}
}
// The Custom HTML Exporter Extensions:4 ends here
// The Custom HTML Exporter Extensions:5 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:5]]
_ => self.inner.start(w, element)?,
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:6]]
_ => {
if self.in_public_heading {
self.inner.start(w, element)?
}
}
}
Ok(())
}
@ -153,10 +242,13 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
match element {
// reset the drawer state tracking
Element::Drawer(_drawer) => {
self.in_drawer = false;
self.current_drawer = None;
}
_ => {
if self.in_drawer {
if !self.in_public_heading {
return Ok(());
}
if self.current_drawer.is_some() {
return Ok(());
}
self.inner.end(w, element)?
@ -165,9 +257,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
Ok(())
}
}
// The Custom HTML Exporter Extensions:5 ends here
// The Custom HTML Exporter Extensions:6 ends here
// [[file:../arroyo-native-parser.org::*The Public Interface][The Public Interface:1]]
// [[file:../arroyo-native-parser.org::*The API Interface][The API Interface:1]]
// sure would be nice..... some day i'll understand lifetimes enough
// to write a function that goes path -> orgize::Org
// use crate::parse::orgize_document;
@ -195,4 +287,4 @@ pub fn htmlize_file(path: String, options: ExportOptions) -> Result<String> {
org_tree.write_html_custom(&mut vec, &mut handler)?;
Ok(String::from_utf8(vec)?)
}
// The Public Interface:1 ends here
// The API Interface:1 ends here

View File

@ -1,9 +1,9 @@
// [[file:../arroyo-native-parser.org::*Library definition and exports for the Python library][Library definition and exports for the Python library:1]]
// [[file:../arroyo-native-parser.org::*Library definition and exports for the native Python library][Library definition and exports for the native Python library:1]]
use pyo3::prelude::*;
pub mod parse;
pub mod export_html;
pub mod export_atom;
// pub mod export_atom;
pub mod types;
#[pymodule]
@ -18,10 +18,10 @@ fn arroyo_rs(py: Python, m: &PyModule) -> PyResult<()> {
Ok(export_html::htmlize_file(path, options)?)
}
#[pyfn(m)]
fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
Ok(export_atom::atomize_file(path, options)?)
}
// #[pyfn(m)]
// fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
// Ok(export_atom::atomize_file(path, options)?)
// }
m.add_class::<types::Document>()?;
m.add_class::<types::Heading>()?;
@ -33,4 +33,4 @@ fn arroyo_rs(py: Python, m: &PyModule) -> PyResult<()> {
Ok(())
}
// Library definition and exports for the Python library:1 ends here
// Library definition and exports for the native Python library:1 ends here

View File

@ -1,4 +1,4 @@
// [[file:../arroyo-native-parser.org::*The Parser][The Parser:1]]
// [[file:../arroyo-native-parser.org::*The Arroyo Org Parser][The Arroyo Org Parser:1]]
use anyhow::Result;
use itertools::Itertools;
use lexpr;
@ -10,7 +10,7 @@ use std::{error::Error, fs};
// use std::collections::HashMap;
use crate::types::{Document, Heading, InvalidDocError, Keyword, Link};
// The Parser:1 ends here
// The Arroyo Org Parser:1 ends here
// [[file:../arroyo-native-parser.org::*The public interface][The public interface:1]]
pub fn parse_document(path: String) -> Result<Document> {
@ -293,7 +293,7 @@ fn split_quoted_string(quoted_str: String) -> Result<Vec<String>, Box<dyn Error>
}
// =split_quoted_string=:1 ends here
// [[file:../arroyo-native-parser.org::*Tests][Tests:1]]
// [[file:../arroyo-native-parser.org::*Code Unit Tests][Code Unit Tests:1]]
#[cfg(test)]
mod tests {
use std::assert_eq;
@ -367,4 +367,4 @@ mod tests {
);
}
}
// Tests:1 ends here
// Code Unit Tests:1 ends here

View File

@ -1,4 +1,4 @@
# [[file:../arroyo-native-parser.org::*Tests][Tests:2]]
# [[file:../arroyo-native-parser.org::*Code Unit Tests][Code Unit Tests:2]]
import arroyo.arroyo_rs
import arroyo.models
@ -41,4 +41,4 @@ def test_relationships():
# assert(headings[0].node_id == '20231023T115950.248543')
# assert(headings[1].node_id == None)
# assert(headings[1].text == "Overview")
# Tests:2 ends here
# Code Unit Tests:2 ends here