Compare commits

...

5 Commits

9 changed files with 698 additions and 465 deletions

View File

@ -229,6 +229,7 @@ Let's start by defining the types. We use =pyo3= macro annotations so that these
use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use pyo3::pyclass;
use std::collections::HashMap;
use std::fmt;
@ -360,6 +361,8 @@ pub struct Heading {
#[pyo3(get)]
pub text: String,
#[pyo3(get)]
pub properties: HashMap<String, String>,
#[pyo3(get)]
pub tags: Option<Vec<String>>,
#[pyo3(get)]
pub refs: Option<Vec<String>>,
@ -374,13 +377,14 @@ impl fmt::Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links)",
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links, props {:?})",
self.id.clone().unwrap_or("None".to_owned()),
self.text,
self.tags.clone().unwrap_or(vec![]).len(),
self.refs.clone().unwrap_or(vec![]).len(),
self.aliases.clone().unwrap_or(vec![]).len(),
self.links.clone().unwrap_or(vec![]).len(),
self.properties.clone(),
)
}
}
@ -475,10 +479,16 @@ pub fn parse_document(path: String) -> Result<Document> {
&org,
&orgize::ParseConfig {
// Need to pull these from environment or options...
todo_keywords: (vec!["NEXT".to_string(), "INPROGRESS".to_string(), "WAITING".to_string()],
vec!["DONE".to_string(), "CANCELLED".to_string()]),
todo_keywords: (
vec![
"NEXT".to_string(),
"INPROGRESS".to_string(),
"WAITING".to_string(),
],
vec!["DONE".to_string(), "CANCELLED".to_string()],
),
..Default::default()
}
},
);
let keywords = extract_metadata(path.clone(), org_tree)?;
let headings = extract_headings(path.clone(), org_tree)?;
@ -532,57 +542,57 @@ There are some mutable variables at the top of this function which are used for
- =inherited_tags= is a list of lists of strings; the inner vector contains the list of tags for each header, starting at level 0 for =FILETAGS= entries. Combining this structure and =cur_level= allows the parser to perform tag inheritance by flattening the list, and by dropping everything "above" the current level when stepping to another header.
#+begin_src rust
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
#+END_SRC
=FILETAG= parsing is a bit nasty to read, but basically the Keyword's value is colon-separated list of strings, these are split and collected and stored in the root heading.
#+begin_src rust
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
#+END_SRC
The root heading's title is the value of the document's =#+TITLE= keyword:
#+begin_src rust
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
#+END_SRC
And now we step in to the state machine. It iterates over each element, providing an =Event::Start= and =Event::End= for each element that the parser supports:
#+begin_src rust
// state machine go brrr
tree.iter()
.map(|event| {
match event {
// state machine go brrr
tree.iter()
.map(|event| {
match event {
#+END_SRC
*** Heading parser
@ -594,50 +604,55 @@ tree.iter()
- Stash the heading in the return vector
#+begin_src rust
Event::Start(orgize::Element::Title(title)) => {
let properties = title.properties.clone().into_hash_map();
cur_id = properties.get("ID").map(|id| id.clone().into());
Event::Start(orgize::Element::Title(title)) => {
let tmp_properties = title.properties.clone().into_hash_map();
let mut export_properties: HashMap<String, String> = HashMap::new();
tmp_properties.iter().for_each(|(k, v)| {
export_properties.insert(k.to_string(), v.to_string());
});
cur_id = export_properties.get("ID").cloned();
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
let refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
let refs = export_properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = export_properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
properties: export_properties,
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
#+END_SRC
**** NEXT I should be doing something like the =inherited_tags= stuff to track =cur_id= inheritance...
@ -655,10 +670,10 @@ Handling the file-level properties drawer is a bit of a pain -- some day I'll ro
When entering a drawer, the parser sets that =in_drawer= state variable; This is a bit boogy since in theory this could be a floating =PROPERTIES= drawer defined anywhere, but my org-mode docs are shaped reasonably enough that we'll cross that rubicon when someone else uses this.
#+begin_src rust
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
#+END_SRC
If the parser encounters a Text block while inside of a drawer, that needs to be parsed, and then the keys and whatnot are shoved in to the root Heading.
@ -666,55 +681,55 @@ If the parser encounters a Text block while inside of a drawer, that needs to be
The drawer is assumed to be a key/value list as in the =PROPERTIES= drawers; this relies on my fork of =orgize= which exposes =parse_drawer_contents=. I *think* this should be able to use =prop_drawer.get= as in the code handling =Headings= above, and then these should be de-duplicated.
#+begin_src rust
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
Ok(())
}
Ok(())
}
#+END_SRC
When we exit the Drawer, the state value is cleared.
#+begin_src rust
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
#+END_SRC
**** NEXT fix orgize to expose file-level propertiesmap
@ -726,40 +741,40 @@ Look; I'm gonna be honest here. I don't remember why the links are stored outsid
(maybe because they may have None IDs in the from_id?)
#+begin_src rust
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
#+END_SRC
*** NEXT Attachment and image caching
@ -854,7 +869,7 @@ Here's the top-matter:
#+begin_src rust
use anyhow::Result;
use pyo3::prelude::*;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::convert::From;
use std::fs;
use std::io::{Error, Write};
@ -868,6 +883,13 @@ use orgize::{Element, Org};
The Exporter is controlled by passing in a struct with (currently) just one thing inside of it, a map of IDs -> public URLs to rewrite them in to.[fn:1:I'll still need to add a way to rewrite missing links in to 404/stub pages but for now they are just left as-is, but this is fine] This thing is a boilerplate =pyo3= class with a constructor attached to it.
The Exporter is controlled by passing in a structure with a few configuration options:
- =link_retargets= maps org IDs to public URLs to rewrite them for the web
- =ignore_tags= is a list of tags which will cause the exporter to not include that heading or any of its children in the final document
- =limit_headings= is a set of org IDs; if this is not empty, the Exporter will *only* export these headings. This will be called "subheading mode"
- =include_subheadings= will instruct subheading mode to also export child headings underneath the ones indicated by =limit_headings=. One hopes the interaction of these two options in the code below will make the semantics clear.
#+begin_src rust
#[derive(Default, Debug, Clone)]
#[pyclass(dict)]
@ -875,14 +897,32 @@ pub struct ExportOptions {
/// id:{the_id} -> URL rewrites
#[pyo3(get)]
pub link_retargets: HashMap<String, String>,
#[pyo3(get)]
pub ignore_tags: HashSet<String>,
#[pyo3(get)]
pub limit_headings: HashSet<String>,
#[pyo3(get)]
pub include_subheadings: bool,
}
#[pymethods]
impl ExportOptions {
#[new]
fn new(link_retargets: HashMap<String, String>) -> Self {
fn new(
link_retargets: HashMap<String, String>,
ignore_tags: Vec<String>,
limit_headings: Vec<String>,
include_subheadings: Option<bool>,
) -> Self {
let mut lh2 = HashSet::new();
lh2.extend(limit_headings);
let mut tags = HashSet::new();
tags.extend(ignore_tags);
ExportOptions {
link_retargets,
limit_headings: lh2,
ignore_tags: tags,
include_subheadings: include_subheadings.unwrap_or(false),
..Default::default()
}
}
@ -908,7 +948,9 @@ pub struct ArroyoHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
/// handler error type
pub error_type: PhantomData<E>,
/// file-property drawer state tracking
in_drawer: bool,
current_drawer: Option<String>,
in_public_heading: bool,
heading_breadcrumbs: Vec<String>,
}
impl<E: From<Error>, H: HtmlHandler<E>> ArroyoHtmlHandler<E, H> {
@ -933,7 +975,10 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoHtmlHandler<E, H> {
ArroyoHtmlHandler {
inner: H::default(),
error_type: PhantomData,
in_drawer: false,
current_drawer: None,
in_public_heading: false,
heading_breadcrumbs: vec![],
options: ExportOptions::default(),
}
}
@ -947,17 +992,74 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoHtmlHandler<E, H> {
#+begin_src rust
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
if self.in_drawer {
return Ok(());
}
match &self.current_drawer {
None => {}
Some(drawer_name) => {
if vec![
String::from("PROPERTIES"),
String::from("REVIEW_DATA"),
String::from("LOGBOOK"),
]
.contains(&drawer_name)
{
return Ok(());
}
}
};
// if !self.in_public_heading {
// return Ok(());
// }
match element {
#+end_src
#+begin_src rust
Element::Title(title) => {
// title.tags
// let has_ignore_tag = title
// .tags
// .clone()
// .into_iter()
// .map(String::from)
// .find(|v| self.options.ignore_tags.contains(v))
// .is_some();
// if has_ignore_tag
let properties = title.properties.clone().into_hash_map();
let our_new_id = properties.get("ID");
let our_level = title.level;
self.heading_breadcrumbs.truncate(our_level);
match our_new_id.clone() {
None => {}
Some(id) => {
self.heading_breadcrumbs.push(id.to_string());
}
}
// dbg!(&self.heading_breadcrumbs);
// dbg!(&self.options.limit_headings);
let breadcrumb_set =
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
self.in_public_heading = self
.options
.limit_headings
.intersection(&breadcrumb_set)
.count()
!= 0;
// dbg!(self.in_public_heading);
if self.in_public_heading {
self.inner.start(w, &element)?
}
}
#+end_src
Because =orgize= doesn't parse the file-level =PROPERTIES= drawer, they're elided from the export. [[id:2e31b385-a003-4369-a136-c6b78c0917e1][org-fc]] state drawers are, too.
#+begin_src rust
Element::Drawer(drawer) => {
self.in_drawer = drawer.name == "PROPERTIES" || drawer.name == "REVIEW_DATA"
self.current_drawer = Some(drawer.name.to_string());
}
#+end_src
@ -973,11 +1075,14 @@ Text parsing is a bit weird to handle rewriting [[id:2e31b385-a003-4369-a136-c6b
",
)
.unwrap();
let after = re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
if self.in_public_heading {
let after =
re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
}
}
}
#+end_src
@ -992,20 +1097,23 @@ Link exporting is going to be the most complicated part of this because it does
None => ("", string_path),
};
let desc = link.desc.clone().unwrap_or(link.path.clone());
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
if self.in_public_heading {
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
}
}
}
#+end_src
@ -1013,7 +1121,11 @@ Link exporting is going to be the most complicated part of this because it does
Everything else is passed along to Syntect or the default HTML Handler.
#+begin_src rust
_ => self.inner.start(w, element)?,
_ => {
if self.in_public_heading {
self.inner.start(w, element)?
}
}
}
Ok(())
}
@ -1022,10 +1134,13 @@ Everything else is passed along to Syntect or the default HTML Handler.
match element {
// reset the drawer state tracking
Element::Drawer(_drawer) => {
self.in_drawer = false;
self.current_drawer = None;
}
_ => {
if self.in_drawer {
if !self.in_public_heading {
return Ok(());
}
if self.current_drawer.is_some() {
return Ok(());
}
self.inner.end(w, element)?
@ -1096,7 +1211,10 @@ For now maybe it is easier to assume that the headings are all in one file; that
the primary tension of the arroyo library now is that its design context is only in the realm of the arcology project's design goals. I need to start deciding whether a design goal of this library is to support non-arcology document systems. surely interoperable but different document systems could be built on top of arroyo
** First Pass
** CANCELLED First Pass
:LOGBOOK:
- State "CANCELLED" from [2024-02-04 Sun 16:02]
:END:
so the first pass of this API could take a file path, extract the feed metadata from keywords and heading properties; it could construct an entire atom feed, falling back to the custom HTML exporter to fill out the feed with text content. That's probably fine, and an API that other document servers could work with.
@ -1586,13 +1704,27 @@ impl Write for &mut InternalWriter {
}
#+end_src
** About the First Pass
I hate that code. It was worth a try, but it's not good, it's super janky. I'm going to add sub-heading support and compose the feeds in the Django side. this API is cleaner but with a different separation of concerns[citation needed]. This can be done by just adding an ExportOption and struct state variable tracking whether the parser has reached a heading it should be exporting.
The Exporter design model is fine, the whole thing where you can nest them. but the code, my rust ability, and the structure of the element iterator in the orgize library make it sort of bodgy and difficult to understand or change, even though there is a literate discussion surrounding it. a subheading export API can be unit tested in ways the exporter cannot.
so the second pass:
** Second API
there's a step further on, where an API takes a list of headings and feed metadata, and it parses each heading and its subheadings to HTML, *which is an API I already want to provide to document systems*. it could take arbitrary document headings provided through the public interface, and construct multi-page feeds.
there's a another option, where an API takes a list of headings and feed metadata, and it parses each heading and its subheadings to HTML. *this is an API I already want to provide to document systems*, and should be written. it could take arbitrary document headings provided through the public interface, and construct multi-page feeds.
this requires the ability to export only a given subheading, which I could implement maybe more simply than the mess I wrote in the first pass.
or we could just clobber together a version of [[https://github.com/tanrax/RSSingle][RSSingle]]; [[id:personal_software_can_be_shitty][Personal Software Can Be Shitty]].
This API could be memoized in the python side with functools.cache so that the headings could be exported.
this would allow me to microblog from my Journal, by allowing feeds to contain headings from arbitrary pages. this is Good. so let's do that.
** Hacky solution
we could just clobber together a version of [[https://github.com/tanrax/RSSingle][RSSingle]]; [[id:personal_software_can_be_shitty][Personal Software Can Be Shitty]].
** Future API
@ -1610,7 +1742,7 @@ use pyo3::prelude::*;
pub mod parse;
pub mod export_html;
pub mod export_atom;
// pub mod export_atom;
pub mod types;
#[pymodule]
@ -1625,10 +1757,10 @@ fn arroyo_rs(py: Python, m: &PyModule) -> PyResult<()> {
Ok(export_html::htmlize_file(path, options)?)
}
#[pyfn(m)]
fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
Ok(export_atom::atomize_file(path, options)?)
}
// #[pyfn(m)]
// fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
// Ok(export_atom::atomize_file(path, options)?)
// }
m.add_class::<types::Document>()?;
m.add_class::<types::Heading>()?;
@ -1786,7 +1918,7 @@ Stub package interface
#+begin_src python :tangle arroyo/__init__.py :mkdirp yes
from .arroyo_rs import parse_file, InvalidDocError
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
#+end_src
** Click command wrapper
@ -1803,9 +1935,10 @@ This is a stub, this module probably doesn't need to be runnable.
import os
import click
import glob
from typing import Optional
# from . import persist_one_file
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
# from . import models
# from sqlmodel import Session
#+end_src
@ -1843,21 +1976,17 @@ def generate_db(source, dest, file_glob):
@cli.command()
@click.option("--file", "-f", help="The file to export")
def export_document(file):
@click.option("--limit-headings", "-H", multiple=True, help="org ID to export")
@click.option("--include-subheadings", "-I", help="when headings are specified, this will control whether to export child headings")
def export_document(file, limit_headings: Optional[set] = None, include_subheadings=False):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
link_retargets = {"currently_reading": "https://rix.si/hello-world"},
limit_headings = limit_headings,
include_subheadings = include_subheadings,
ignore_tags = [],
)
print(htmlize_file(file, options))
@cli.command()
@click.option("--file", "-f", help="The file to export")
def atomize_document(file):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
)
print(atomize_file(file, options))
#+end_src
#+begin_src python

View File

@ -1,4 +1,4 @@
# [[file:../arroyo-native-parser.org::*Python Package][Python Package:1]]
from .arroyo_rs import parse_file, InvalidDocError
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
# Python Package:1 ends here

View File

@ -2,9 +2,10 @@
import os
import click
import glob
from typing import Optional
# from . import persist_one_file
from .arroyo_rs import atomize_file, htmlize_file, ExportOptions
from .arroyo_rs import htmlize_file, ExportOptions
# from . import models
# from sqlmodel import Session
# Click command wrapper:1 ends here
@ -40,21 +41,17 @@ def generate_db(source, dest, file_glob):
@cli.command()
@click.option("--file", "-f", help="The file to export")
def export_document(file):
@click.option("--limit-headings", "-H", multiple=True, help="org ID to export")
@click.option("--include-subheadings", "-I", help="when headings are specified, this will control whether to export child headings")
def export_document(file, limit_headings: Optional[set] = None, include_subheadings=False):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
link_retargets = {"currently_reading": "https://rix.si/hello-world"},
limit_headings = limit_headings,
include_subheadings = include_subheadings,
ignore_tags = [],
)
print(htmlize_file(file, options))
@cli.command()
@click.option("--file", "-f", help="The file to export")
def atomize_document(file):
# in The Real World this is loaded from DB and generated.
options = ExportOptions(
link_retargets = {"currently_reading": "https://rix.si/hello-world"}
)
print(atomize_file(file, options))
# Click command wrapper:2 ends here
# [[file:../arroyo-native-parser.org::*Click command wrapper][Click command wrapper:3]]

View File

@ -1,4 +1,4 @@
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:1]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:1]]
use anyhow::Result;
use regex;
use std::borrow::Cow;
@ -11,9 +11,9 @@ use orgize::{elements, Element, Org};
use crate::export_html::ArroyoHtmlHandler;
use crate::export_html::ExportOptions;
// First Pass:1 ends here
// CANCELLED First Pass:1 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:2]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:2]]
pub struct ArroyoAtomHandler<E: From<Error>, H: HtmlHandler<E>> {
pub options: ExportOptions,
pub inner: ArroyoHtmlHandler<E, H>,
@ -61,9 +61,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoAtomHandler<E, H> {
}
}
}
// First Pass:2 ends here
// CANCELLED First Pass:2 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:3]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:3]]
pub fn atomize_file(path: String, options: ExportOptions) -> Result<String> {
let syntect_handler = SyntectHtmlHandler::new(DefaultHtmlHandler);
let html_handler = ArroyoHtmlHandler::new(options.clone(), syntect_handler);
@ -90,74 +90,74 @@ pub fn atomize_file(path: String, options: ExportOptions) -> Result<String> {
org_tree.write_html_custom(&mut vec, &mut handler)?;
Ok(String::from_utf8(vec)?)
}
// First Pass:3 ends here
// CANCELLED First Pass:3 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:4]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:4]]
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoAtomHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
(match element {
// First Pass:4 ends here
// CANCELLED First Pass:4 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:5]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:5]]
Element::Document { .. } => self.start_document(w, element),
// First Pass:5 ends here
// CANCELLED First Pass:5 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:6]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:6]]
Element::Keyword(kw) => self.start_keyword(w, kw),
// First Pass:6 ends here
// CANCELLED First Pass:6 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:7]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:7]]
Element::Title(title) => self.start_title(w, title),
// First Pass:7 ends here
// CANCELLED First Pass:7 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:8]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:8]]
Element::Drawer(drawer) => {
self.in_drawer = drawer.name == "PROPERTIES" && self.feed_page_id.eq("");
self.start_rest(w, element)
}
Element::Text { value } => self.start_text(w, value),
// First Pass:8 ends here
// CANCELLED First Pass:8 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:9]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:9]]
_t => self.start_rest(w, element),
})
.unwrap(); // if we can't parse something, just fucken panic.
// First Pass:9 ends here
// CANCELLED First Pass:9 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:10]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:10]]
Ok(())
}
fn end<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
(match element {
// First Pass:10 ends here
// CANCELLED First Pass:10 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:11]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:11]]
// Element::Title(_title) => {}
Element::Document { .. } => self.end_document(w, element),
// First Pass:11 ends here
// CANCELLED First Pass:11 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:12]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:12]]
Element::Drawer(drawer) => {
self.in_drawer = false;
self.end_rest(w, element)
}
// First Pass:12 ends here
// CANCELLED First Pass:12 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:13]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:13]]
_ => self.end_rest(w, element),
})
.ok();
Ok(())
}
}
// First Pass:13 ends here
// CANCELLED First Pass:13 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:14]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:14]]
impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
// First Pass:14 ends here
// CANCELLED First Pass:14 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:15]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:15]]
fn start_document<W: Write>(&mut self, mut w: W, _document: &elements::Element) -> Result<()> {
Ok(write!(
w,
@ -178,9 +178,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
)?;
Ok(())
}
// First Pass:15 ends here
// CANCELLED First Pass:15 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:16]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:16]]
fn start_title<W: Write>(&mut self, mut w: W, title: &elements::Title) -> Result<()> {
let ignore_tags = vec![
String::from("noexport"),
@ -254,9 +254,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
Ok(())
}
}
// First Pass:16 ends here
// CANCELLED First Pass:16 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:17]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:17]]
fn start_keyword<W: Write>(&mut self, mut w: W, kw: &elements::Keyword) -> Result<()> {
// dbg!(kw);
match kw.key.as_ref() {
@ -292,9 +292,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
}
Ok(())
}
// First Pass:17 ends here
// CANCELLED First Pass:17 ends here
// [[file:../arroyo-native-parser.org::*First Pass][First Pass:18]]
// [[file:../arroyo-native-parser.org::*CANCELLED First Pass][CANCELLED First Pass:18]]
fn start_text<W: Write>(&mut self, mut w: W, text: &Cow<str>) -> Result<()> {
if self.in_drawer == true {
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
@ -346,7 +346,7 @@ impl<E: From<Error>, H: HtmlHandler<E>> ArroyoAtomHandler<E, H> {
Ok(())
}
}
// First Pass:18 ends here
// CANCELLED First Pass:18 ends here
// [[file:../arroyo-native-parser.org::*Strip Links from Strings][Strip Links from Strings:1]]
fn strip_links_from_str(in_str: &str) -> Result<String> {

View File

@ -1,7 +1,7 @@
// [[file:../arroyo-native-parser.org::*The HTML exporter][The HTML exporter:1]]
// [[file:../arroyo-native-parser.org::*The Arroyo HTML exporter][The Arroyo HTML exporter:1]]
use anyhow::Result;
use pyo3::prelude::*;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::convert::From;
use std::fs;
use std::io::{Error, Write};
@ -11,23 +11,41 @@ use regex::Regex;
use orgize::export::{DefaultHtmlHandler, HtmlEscape, HtmlHandler, SyntectHtmlHandler};
use orgize::{Element, Org};
// The HTML exporter:1 ends here
// The Arroyo HTML exporter:1 ends here
// [[file:../arroyo-native-parser.org::*The HTML exporter][The HTML exporter:2]]
// [[file:../arroyo-native-parser.org::*The Arroyo HTML exporter][The Arroyo HTML exporter:2]]
#[derive(Default, Debug, Clone)]
#[pyclass(dict)]
pub struct ExportOptions {
/// id:{the_id} -> URL rewrites
#[pyo3(get)]
pub link_retargets: HashMap<String, String>,
#[pyo3(get)]
pub ignore_tags: HashSet<String>,
#[pyo3(get)]
pub limit_headings: HashSet<String>,
#[pyo3(get)]
pub include_subheadings: bool,
}
#[pymethods]
impl ExportOptions {
#[new]
fn new(link_retargets: HashMap<String, String>) -> Self {
fn new(
link_retargets: HashMap<String, String>,
ignore_tags: Vec<String>,
limit_headings: Vec<String>,
include_subheadings: Option<bool>,
) -> Self {
let mut lh2 = HashSet::new();
lh2.extend(limit_headings);
let mut tags = HashSet::new();
tags.extend(ignore_tags);
ExportOptions {
link_retargets,
limit_headings: lh2,
ignore_tags: tags,
include_subheadings: include_subheadings.unwrap_or(false),
..Default::default()
}
}
@ -41,9 +59,9 @@ impl ExportOptions {
// Self::__repr__(slf)
// }
}
// The HTML exporter:2 ends here
// The Arroyo HTML exporter:2 ends here
// [[file:../arroyo-native-parser.org::*The HTML exporter][The HTML exporter:3]]
// [[file:../arroyo-native-parser.org::*The Arroyo HTML exporter][The Arroyo HTML exporter:3]]
pub struct ArroyoHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
pub options: ExportOptions,
/// inner html handler
@ -51,7 +69,9 @@ pub struct ArroyoHtmlHandler<E: From<Error>, H: HtmlHandler<E>> {
/// handler error type
pub error_type: PhantomData<E>,
/// file-property drawer state tracking
in_drawer: bool,
current_drawer: Option<String>,
in_public_heading: bool,
heading_breadcrumbs: Vec<String>,
}
impl<E: From<Error>, H: HtmlHandler<E>> ArroyoHtmlHandler<E, H> {
@ -76,29 +96,88 @@ impl<E: From<Error>, H: HtmlHandler<E>> Default for ArroyoHtmlHandler<E, H> {
ArroyoHtmlHandler {
inner: H::default(),
error_type: PhantomData,
in_drawer: false,
current_drawer: None,
in_public_heading: false,
heading_breadcrumbs: vec![],
options: ExportOptions::default(),
}
}
}
// The HTML exporter:3 ends here
// The Arroyo HTML exporter:3 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:1]]
impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E, H> {
fn start<W: Write>(&mut self, mut w: W, element: &Element) -> Result<(), E> {
if self.in_drawer {
return Ok(());
}
match &self.current_drawer {
None => {}
Some(drawer_name) => {
if vec![
String::from("PROPERTIES"),
String::from("REVIEW_DATA"),
String::from("LOGBOOK"),
]
.contains(&drawer_name)
{
return Ok(());
}
}
};
// if !self.in_public_heading {
// return Ok(());
// }
match element {
// The Custom HTML Exporter Extensions:1 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:2]]
Element::Drawer(drawer) => {
self.in_drawer = drawer.name == "PROPERTIES" || drawer.name == "REVIEW_DATA"
Element::Title(title) => {
// title.tags
// let has_ignore_tag = title
// .tags
// .clone()
// .into_iter()
// .map(String::from)
// .find(|v| self.options.ignore_tags.contains(v))
// .is_some();
// if has_ignore_tag
let properties = title.properties.clone().into_hash_map();
let our_new_id = properties.get("ID");
let our_level = title.level;
self.heading_breadcrumbs.truncate(our_level);
match our_new_id.clone() {
None => {}
Some(id) => {
self.heading_breadcrumbs.push(id.to_string());
}
}
// dbg!(&self.heading_breadcrumbs);
// dbg!(&self.options.limit_headings);
let breadcrumb_set =
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
self.in_public_heading = self
.options
.limit_headings
.intersection(&breadcrumb_set)
.count()
!= 0;
// dbg!(self.in_public_heading);
if self.in_public_heading {
self.inner.start(w, &element)?
}
}
// The Custom HTML Exporter Extensions:2 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:3]]
Element::Drawer(drawer) => {
self.current_drawer = Some(drawer.name.to_string());
}
// The Custom HTML Exporter Extensions:3 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:4]]
Element::Text { value: before } => {
let re = Regex::new(
r"(?x)
@ -108,16 +187,19 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
",
)
.unwrap();
let after = re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
if self.in_public_heading {
let after =
re.replace_all(before, "<span class='fc-cloze' title='$2'>$1</span>");
if after.eq(before) {
self.inner.start(w, &Element::Text { value: after })?
} else {
write!(w, "{}", after)?
}
}
}
// The Custom HTML Exporter Extensions:3 ends here
// The Custom HTML Exporter Extensions:4 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:4]]
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:5]]
Element::Link(link) => {
let string_path = link.path.to_string();
let (proto, stripped_dest) = match string_path.split_once(':') {
@ -125,26 +207,33 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
None => ("", string_path),
};
let desc = link.desc.clone().unwrap_or(link.path.clone());
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
if self.in_public_heading {
match proto {
"id" => write!(
w,
"<a href=\"{}\">{}</a>",
self.rewrite_link_from(&stripped_dest),
HtmlEscape(&desc),
)?,
"roam" => write!(
w,
"<a href=\"/404?key={}\">{}</a>",
HtmlEscape(&link.path),
HtmlEscape(&desc),
)?,
_ => self.inner.start(w, &Element::Link(link.clone()))?,
}
}
}
// The Custom HTML Exporter Extensions:4 ends here
// The Custom HTML Exporter Extensions:5 ends here
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:5]]
_ => self.inner.start(w, element)?,
// [[file:../arroyo-native-parser.org::*The Custom HTML Exporter Extensions][The Custom HTML Exporter Extensions:6]]
_ => {
if self.in_public_heading {
self.inner.start(w, element)?
}
}
}
Ok(())
}
@ -153,10 +242,13 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
match element {
// reset the drawer state tracking
Element::Drawer(_drawer) => {
self.in_drawer = false;
self.current_drawer = None;
}
_ => {
if self.in_drawer {
if !self.in_public_heading {
return Ok(());
}
if self.current_drawer.is_some() {
return Ok(());
}
self.inner.end(w, element)?
@ -165,9 +257,9 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
Ok(())
}
}
// The Custom HTML Exporter Extensions:5 ends here
// The Custom HTML Exporter Extensions:6 ends here
// [[file:../arroyo-native-parser.org::*The Public Interface][The Public Interface:1]]
// [[file:../arroyo-native-parser.org::*The API Interface][The API Interface:1]]
// sure would be nice..... some day i'll understand lifetimes enough
// to write a function that goes path -> orgize::Org
// use crate::parse::orgize_document;
@ -195,4 +287,4 @@ pub fn htmlize_file(path: String, options: ExportOptions) -> Result<String> {
org_tree.write_html_custom(&mut vec, &mut handler)?;
Ok(String::from_utf8(vec)?)
}
// The Public Interface:1 ends here
// The API Interface:1 ends here

View File

@ -1,9 +1,9 @@
// [[file:../arroyo-native-parser.org::*Library definition and exports for the Python library][Library definition and exports for the Python library:1]]
// [[file:../arroyo-native-parser.org::*Library definition and exports for the native Python library][Library definition and exports for the native Python library:1]]
use pyo3::prelude::*;
pub mod parse;
pub mod export_html;
pub mod export_atom;
// pub mod export_atom;
pub mod types;
#[pymodule]
@ -18,10 +18,10 @@ fn arroyo_rs(py: Python, m: &PyModule) -> PyResult<()> {
Ok(export_html::htmlize_file(path, options)?)
}
#[pyfn(m)]
fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
Ok(export_atom::atomize_file(path, options)?)
}
// #[pyfn(m)]
// fn atomize_file(path: String, options: export_html::ExportOptions) -> PyResult<String> {
// Ok(export_atom::atomize_file(path, options)?)
// }
m.add_class::<types::Document>()?;
m.add_class::<types::Heading>()?;
@ -33,4 +33,4 @@ fn arroyo_rs(py: Python, m: &PyModule) -> PyResult<()> {
Ok(())
}
// Library definition and exports for the Python library:1 ends here
// Library definition and exports for the native Python library:1 ends here

View File

@ -1,4 +1,4 @@
// [[file:../arroyo-native-parser.org::*The Parser][The Parser:1]]
// [[file:../arroyo-native-parser.org::*The Arroyo Org Parser][The Arroyo Org Parser:1]]
use anyhow::Result;
use itertools::Itertools;
use lexpr;
@ -10,7 +10,7 @@ use std::{error::Error, fs};
// use std::collections::HashMap;
use crate::types::{Document, Heading, InvalidDocError, Keyword, Link};
// The Parser:1 ends here
// The Arroyo Org Parser:1 ends here
// [[file:../arroyo-native-parser.org::*The public interface][The public interface:1]]
pub fn parse_document(path: String) -> Result<Document> {
@ -19,10 +19,16 @@ pub fn parse_document(path: String) -> Result<Document> {
&org,
&orgize::ParseConfig {
// Need to pull these from environment or options...
todo_keywords: (vec!["NEXT".to_string(), "INPROGRESS".to_string(), "WAITING".to_string()],
vec!["DONE".to_string(), "CANCELLED".to_string()]),
todo_keywords: (
vec![
"NEXT".to_string(),
"INPROGRESS".to_string(),
"WAITING".to_string(),
],
vec!["DONE".to_string(), "CANCELLED".to_string()],
),
..Default::default()
}
},
);
let keywords = extract_metadata(path.clone(), org_tree)?;
let headings = extract_headings(path.clone(), org_tree)?;
@ -53,192 +59,197 @@ pub fn extract_headings(path: String, tree: &Org) -> Result<Vec<Heading>> {
// Extracting Arroyo Headings:1 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:2]]
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
// Extracting Arroyo Headings:2 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:3]]
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
// Extracting Arroyo Headings:3 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:4]]
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
// Extracting Arroyo Headings:4 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:5]]
// state machine go brrr
tree.iter()
.map(|event| {
match event {
// state machine go brrr
tree.iter()
.map(|event| {
match event {
// Extracting Arroyo Headings:5 ends here
// [[file:../arroyo-native-parser.org::*Heading parser][Heading parser:1]]
Event::Start(orgize::Element::Title(title)) => {
let properties = title.properties.clone().into_hash_map();
cur_id = properties.get("ID").map(|id| id.clone().into());
Event::Start(orgize::Element::Title(title)) => {
let tmp_properties = title.properties.clone().into_hash_map();
let mut export_properties: HashMap<String, String> = HashMap::new();
tmp_properties.iter().for_each(|(k, v)| {
export_properties.insert(k.to_string(), v.to_string());
});
cur_id = export_properties.get("ID").cloned();
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
let refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
let refs = export_properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = export_properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
properties: export_properties,
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
// Heading parser:1 ends here
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:1]]
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
// File-level Property Drawer parsing:1 ends here
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:2]]
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
Ok(())
}
Ok(())
}
// File-level Property Drawer parsing:2 ends here
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:3]]
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
// File-level Property Drawer parsing:3 ends here
// [[file:../arroyo-native-parser.org::*Link parsing][Link parsing:1]]
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
// Link parsing:1 ends here
// [[file:../arroyo-native-parser.org::*Cleaning up][Cleaning up:1]]
@ -293,7 +304,7 @@ fn split_quoted_string(quoted_str: String) -> Result<Vec<String>, Box<dyn Error>
}
// =split_quoted_string=:1 ends here
// [[file:../arroyo-native-parser.org::*Tests][Tests:1]]
// [[file:../arroyo-native-parser.org::*Code Unit Tests][Code Unit Tests:1]]
#[cfg(test)]
mod tests {
use std::assert_eq;
@ -367,4 +378,4 @@ mod tests {
);
}
}
// Tests:1 ends here
// Code Unit Tests:1 ends here

View File

@ -2,6 +2,7 @@
use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use pyo3::pyclass;
use std::collections::HashMap;
use std::fmt;
@ -106,6 +107,8 @@ pub struct Heading {
#[pyo3(get)]
pub text: String,
#[pyo3(get)]
pub properties: HashMap<String, String>,
#[pyo3(get)]
pub tags: Option<Vec<String>>,
#[pyo3(get)]
pub refs: Option<Vec<String>>,
@ -120,13 +123,14 @@ impl fmt::Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links)",
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links, props {:?})",
self.id.clone().unwrap_or("None".to_owned()),
self.text,
self.tags.clone().unwrap_or(vec![]).len(),
self.refs.clone().unwrap_or(vec![]).len(),
self.aliases.clone().unwrap_or(vec![]).len(),
self.links.clone().unwrap_or(vec![]).len(),
self.properties.clone(),
)
}
}

View File

@ -1,4 +1,4 @@
# [[file:../arroyo-native-parser.org::*Tests][Tests:2]]
# [[file:../arroyo-native-parser.org::*Code Unit Tests][Code Unit Tests:2]]
import arroyo.arroyo_rs
import arroyo.models
@ -41,4 +41,4 @@ def test_relationships():
# assert(headings[0].node_id == '20231023T115950.248543')
# assert(headings[1].node_id == None)
# assert(headings[1].text == "Overview")
# Tests:2 ends here
# Code Unit Tests:2 ends here