Compare commits
2 Commits
10ae6f80a9
...
52258e2db7
Author | SHA1 | Date |
---|---|---|
Ryan Rix | 52258e2db7 | |
Ryan Rix | be91137ba8 |
|
@ -229,6 +229,7 @@ Let's start by defining the types. We use =pyo3= macro annotations so that these
|
|||
use pyo3::exceptions::PyException;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::pyclass;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::fmt;
|
||||
|
||||
|
@ -360,6 +361,8 @@ pub struct Heading {
|
|||
#[pyo3(get)]
|
||||
pub text: String,
|
||||
#[pyo3(get)]
|
||||
pub properties: HashMap<String, String>,
|
||||
#[pyo3(get)]
|
||||
pub tags: Option<Vec<String>>,
|
||||
#[pyo3(get)]
|
||||
pub refs: Option<Vec<String>>,
|
||||
|
@ -374,13 +377,14 @@ impl fmt::Display for Heading {
|
|||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links)",
|
||||
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links, props {:?})",
|
||||
self.id.clone().unwrap_or("None".to_owned()),
|
||||
self.text,
|
||||
self.tags.clone().unwrap_or(vec![]).len(),
|
||||
self.refs.clone().unwrap_or(vec![]).len(),
|
||||
self.aliases.clone().unwrap_or(vec![]).len(),
|
||||
self.links.clone().unwrap_or(vec![]).len(),
|
||||
self.properties.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -475,10 +479,16 @@ pub fn parse_document(path: String) -> Result<Document> {
|
|||
&org,
|
||||
&orgize::ParseConfig {
|
||||
// Need to pull these from environment or options...
|
||||
todo_keywords: (vec!["NEXT".to_string(), "INPROGRESS".to_string(), "WAITING".to_string()],
|
||||
vec!["DONE".to_string(), "CANCELLED".to_string()]),
|
||||
todo_keywords: (
|
||||
vec![
|
||||
"NEXT".to_string(),
|
||||
"INPROGRESS".to_string(),
|
||||
"WAITING".to_string(),
|
||||
],
|
||||
vec!["DONE".to_string(), "CANCELLED".to_string()],
|
||||
),
|
||||
..Default::default()
|
||||
}
|
||||
},
|
||||
);
|
||||
let keywords = extract_metadata(path.clone(), org_tree)?;
|
||||
let headings = extract_headings(path.clone(), org_tree)?;
|
||||
|
@ -532,57 +542,57 @@ There are some mutable variables at the top of this function which are used for
|
|||
- =inherited_tags= is a list of lists of strings; the inner vector contains the list of tags for each header, starting at level 0 for =FILETAGS= entries. Combining this structure and =cur_level= allows the parser to perform tag inheritance by flattening the list, and by dropping everything "above" the current level when stepping to another header.
|
||||
|
||||
#+begin_src rust
|
||||
let mut in_drawer: bool = false;
|
||||
let mut id_crumbs: Vec<Option<String>> = Vec::new();
|
||||
let mut cur_id: Option<String> = None;
|
||||
let mut cur_level: usize = 0;
|
||||
let mut headings: Vec<Heading> = Vec::new();
|
||||
headings.push(Heading::default());
|
||||
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
|
||||
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
|
||||
let mut in_drawer: bool = false;
|
||||
let mut id_crumbs: Vec<Option<String>> = Vec::new();
|
||||
let mut cur_id: Option<String> = None;
|
||||
let mut cur_level: usize = 0;
|
||||
let mut headings: Vec<Heading> = Vec::new();
|
||||
headings.push(Heading::default());
|
||||
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
|
||||
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
|
||||
#+END_SRC
|
||||
|
||||
=FILETAG= parsing is a bit nasty to read, but basically the Keyword's value is colon-separated list of strings, these are split and collected and stored in the root heading.
|
||||
|
||||
#+begin_src rust
|
||||
// file level metadata + filetags
|
||||
let file_metadata = extract_metadata(path.clone(), tree)?;
|
||||
let filetags = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "filetags")
|
||||
{
|
||||
Some(kw) => kw
|
||||
.value
|
||||
.split(':')
|
||||
.map(|s| s.to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect(),
|
||||
_ => Vec::<String>::new(),
|
||||
};
|
||||
headings[0].tags = Some(filetags.clone());
|
||||
// file level metadata + filetags
|
||||
let file_metadata = extract_metadata(path.clone(), tree)?;
|
||||
let filetags = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "filetags")
|
||||
{
|
||||
Some(kw) => kw
|
||||
.value
|
||||
.split(':')
|
||||
.map(|s| s.to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect(),
|
||||
_ => Vec::<String>::new(),
|
||||
};
|
||||
headings[0].tags = Some(filetags.clone());
|
||||
#+END_SRC
|
||||
|
||||
The root heading's title is the value of the document's =#+TITLE= keyword:
|
||||
|
||||
#+begin_src rust
|
||||
// Extract document title and apply to level 0 heading
|
||||
let doc_title = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "title")
|
||||
{
|
||||
Some(kw) => kw.value.clone(),
|
||||
_ => String::from(""),
|
||||
};
|
||||
headings[0].text = doc_title;
|
||||
// Extract document title and apply to level 0 heading
|
||||
let doc_title = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "title")
|
||||
{
|
||||
Some(kw) => kw.value.clone(),
|
||||
_ => String::from(""),
|
||||
};
|
||||
headings[0].text = doc_title;
|
||||
#+END_SRC
|
||||
|
||||
And now we step in to the state machine. It iterates over each element, providing an =Event::Start= and =Event::End= for each element that the parser supports:
|
||||
|
||||
#+begin_src rust
|
||||
// state machine go brrr
|
||||
tree.iter()
|
||||
.map(|event| {
|
||||
match event {
|
||||
// state machine go brrr
|
||||
tree.iter()
|
||||
.map(|event| {
|
||||
match event {
|
||||
#+END_SRC
|
||||
|
||||
*** Heading parser
|
||||
|
@ -594,50 +604,55 @@ tree.iter()
|
|||
- Stash the heading in the return vector
|
||||
|
||||
#+begin_src rust
|
||||
Event::Start(orgize::Element::Title(title)) => {
|
||||
let properties = title.properties.clone().into_hash_map();
|
||||
cur_id = properties.get("ID").map(|id| id.clone().into());
|
||||
Event::Start(orgize::Element::Title(title)) => {
|
||||
let tmp_properties = title.properties.clone().into_hash_map();
|
||||
let mut export_properties: HashMap<String, String> = HashMap::new();
|
||||
tmp_properties.iter().for_each(|(k, v)| {
|
||||
export_properties.insert(k.to_string(), v.to_string());
|
||||
});
|
||||
cur_id = export_properties.get("ID").cloned();
|
||||
|
||||
id_crumbs.truncate(cur_level + 1);
|
||||
id_crumbs.push(cur_id.clone());
|
||||
id_crumbs.truncate(cur_level + 1);
|
||||
id_crumbs.push(cur_id.clone());
|
||||
|
||||
let refs = properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
let aliases = properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
cur_level = title.level;
|
||||
let refs = export_properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
let aliases = export_properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
cur_level = title.level;
|
||||
|
||||
// reset the tags table
|
||||
inherited_tags.truncate(cur_level - 1);
|
||||
let new_tags: Vec<String> = title
|
||||
.tags
|
||||
.iter()
|
||||
.map(|mbox| mbox.clone().to_string())
|
||||
.collect();
|
||||
inherited_tags.push(new_tags);
|
||||
// reset the tags table
|
||||
inherited_tags.truncate(cur_level - 1);
|
||||
let new_tags: Vec<String> = title
|
||||
.tags
|
||||
.iter()
|
||||
.map(|mbox| mbox.clone().to_string())
|
||||
.collect();
|
||||
inherited_tags.push(new_tags);
|
||||
|
||||
let most_tags = inherited_tags.concat();
|
||||
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
|
||||
let most_tags = inherited_tags.concat();
|
||||
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
|
||||
|
||||
let h = Heading {
|
||||
id: cur_id.clone(),
|
||||
level: cur_level,
|
||||
text: title.raw.to_string(),
|
||||
tags: match all_tags.len() {
|
||||
0 => None,
|
||||
_ => Some(all_tags),
|
||||
},
|
||||
refs,
|
||||
aliases,
|
||||
..Default::default()
|
||||
};
|
||||
headings.push(h);
|
||||
Ok(())
|
||||
}
|
||||
let h = Heading {
|
||||
id: cur_id.clone(),
|
||||
level: cur_level,
|
||||
text: title.raw.to_string(),
|
||||
tags: match all_tags.len() {
|
||||
0 => None,
|
||||
_ => Some(all_tags),
|
||||
},
|
||||
properties: export_properties,
|
||||
refs,
|
||||
aliases,
|
||||
..Default::default()
|
||||
};
|
||||
headings.push(h);
|
||||
Ok(())
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
**** NEXT I should be doing something like the =inherited_tags= stuff to track =cur_id= inheritance...
|
||||
|
@ -655,10 +670,10 @@ Handling the file-level properties drawer is a bit of a pain -- some day I'll ro
|
|||
When entering a drawer, the parser sets that =in_drawer= state variable; This is a bit boogy since in theory this could be a floating =PROPERTIES= drawer defined anywhere, but my org-mode docs are shaped reasonably enough that we'll cross that rubicon when someone else uses this.
|
||||
|
||||
#+begin_src rust
|
||||
Event::Start(orgize::Element::Drawer(drawer)) => {
|
||||
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
|
||||
Ok(())
|
||||
}
|
||||
Event::Start(orgize::Element::Drawer(drawer)) => {
|
||||
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
|
||||
Ok(())
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
If the parser encounters a Text block while inside of a drawer, that needs to be parsed, and then the keys and whatnot are shoved in to the root Heading.
|
||||
|
@ -666,55 +681,55 @@ If the parser encounters a Text block while inside of a drawer, that needs to be
|
|||
The drawer is assumed to be a key/value list as in the =PROPERTIES= drawers; this relies on my fork of =orgize= which exposes =parse_drawer_contents=. I *think* this should be able to use =prop_drawer.get= as in the code handling =Headings= above, and then these should be de-duplicated.
|
||||
|
||||
#+begin_src rust
|
||||
Event::Start(orgize::Element::Text { value }) => {
|
||||
if in_drawer {
|
||||
// this is where we rely on forked orgize
|
||||
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
|
||||
orgize::elements::Drawer::parse_drawer_content(value)
|
||||
.expect("failed to parse properties drawer");
|
||||
let properties = prop_drawer.into_hash_map();
|
||||
Event::Start(orgize::Element::Text { value }) => {
|
||||
if in_drawer {
|
||||
// this is where we rely on forked orgize
|
||||
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
|
||||
orgize::elements::Drawer::parse_drawer_content(value)
|
||||
.expect("failed to parse properties drawer");
|
||||
let properties = prop_drawer.into_hash_map();
|
||||
|
||||
// update cur_id and heading 0 ID since this is
|
||||
// implied to be the first drawer, but it's kind
|
||||
// of :yikes: to think about it like that! we
|
||||
// could be genious enough to have a floating
|
||||
// PROPERTIES drawer that would muck things up
|
||||
cur_id = properties.get("ID").map(|s| s.to_string());
|
||||
if cur_id.is_none() {
|
||||
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
|
||||
}
|
||||
// update cur_id and heading 0 ID since this is
|
||||
// implied to be the first drawer, but it's kind
|
||||
// of :yikes: to think about it like that! we
|
||||
// could be genious enough to have a floating
|
||||
// PROPERTIES drawer that would muck things up
|
||||
cur_id = properties.get("ID").map(|s| s.to_string());
|
||||
if cur_id.is_none() {
|
||||
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
|
||||
}
|
||||
|
||||
id_crumbs = vec![cur_id.clone()];
|
||||
headings[0].id = cur_id.clone();
|
||||
id_crumbs = vec![cur_id.clone()];
|
||||
headings[0].id = cur_id.clone();
|
||||
|
||||
headings[0].aliases = properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
headings[0].refs = properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
}
|
||||
headings[0].aliases = properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
headings[0].refs = properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
}
|
||||
|
||||
if headings[0].id.is_none() {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"Root ID is None in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
if headings[0].id.is_none() {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"Root ID is None in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
When we exit the Drawer, the state value is cleared.
|
||||
|
||||
#+begin_src rust
|
||||
Event::End(orgize::Element::Drawer(_drawer)) => {
|
||||
in_drawer = false;
|
||||
Ok(())
|
||||
}
|
||||
Event::End(orgize::Element::Drawer(_drawer)) => {
|
||||
in_drawer = false;
|
||||
Ok(())
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
**** NEXT fix orgize to expose file-level propertiesmap
|
||||
|
@ -726,40 +741,40 @@ Look; I'm gonna be honest here. I don't remember why the links are stored outsid
|
|||
(maybe because they may have None IDs in the from_id?)
|
||||
|
||||
#+begin_src rust
|
||||
// Stash links outside the match block in a HashMap shape
|
||||
// of heading id -> list of links; it would be nice if the
|
||||
// match block returned an Option<Link> but that doesn't
|
||||
// play well with the rest of the state machine
|
||||
Event::Start(orgize::Element::Link(link)) => {
|
||||
let dest = link.path.to_string();
|
||||
let (proto, stripped_dest): (Option<String>, String) =
|
||||
match dest.split_once(':') {
|
||||
Some((proto, stripped_dest)) => {
|
||||
(Some(proto.to_string()), stripped_dest.to_string())
|
||||
}
|
||||
None => (None, dest.clone()),
|
||||
};
|
||||
// Stash links outside the match block in a HashMap shape
|
||||
// of heading id -> list of links; it would be nice if the
|
||||
// match block returned an Option<Link> but that doesn't
|
||||
// play well with the rest of the state machine
|
||||
Event::Start(orgize::Element::Link(link)) => {
|
||||
let dest = link.path.to_string();
|
||||
let (proto, stripped_dest): (Option<String>, String) =
|
||||
match dest.split_once(':') {
|
||||
Some((proto, stripped_dest)) => {
|
||||
(Some(proto.to_string()), stripped_dest.to_string())
|
||||
}
|
||||
None => (None, dest.clone()),
|
||||
};
|
||||
|
||||
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
|
||||
Some(last_non_none) => last_non_none,
|
||||
None => {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"no non-none ID in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
};
|
||||
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
|
||||
Some(last_non_none) => last_non_none,
|
||||
None => {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"no non-none ID in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
|
||||
link_list.push(Link {
|
||||
from_file: path.clone().to_string(),
|
||||
from_id: last_non_none.clone(),
|
||||
to: stripped_dest.clone(),
|
||||
to_proto: proto.clone(),
|
||||
text: link.desc.clone().map(String::from),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
|
||||
link_list.push(Link {
|
||||
from_file: path.clone().to_string(),
|
||||
from_id: last_non_none.clone(),
|
||||
to: stripped_dest.clone(),
|
||||
to_proto: proto.clone(),
|
||||
text: link.desc.clone().map(String::from),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
#+END_SRC
|
||||
|
||||
*** NEXT Attachment and image caching
|
||||
|
@ -1020,8 +1035,8 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
|
|||
self.heading_breadcrumbs.push(id.to_string());
|
||||
}
|
||||
}
|
||||
dbg!(&self.heading_breadcrumbs);
|
||||
dbg!(&self.options.limit_headings);
|
||||
// dbg!(&self.heading_breadcrumbs);
|
||||
// dbg!(&self.options.limit_headings);
|
||||
|
||||
let breadcrumb_set =
|
||||
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
|
||||
|
@ -1032,7 +1047,7 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
|
|||
.count()
|
||||
!= 0;
|
||||
|
||||
dbg!(self.in_public_heading);
|
||||
// dbg!(self.in_public_heading);
|
||||
if self.in_public_heading {
|
||||
self.inner.start(w, &element)?
|
||||
}
|
||||
|
|
|
@ -152,8 +152,8 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
|
|||
self.heading_breadcrumbs.push(id.to_string());
|
||||
}
|
||||
}
|
||||
dbg!(&self.heading_breadcrumbs);
|
||||
dbg!(&self.options.limit_headings);
|
||||
// dbg!(&self.heading_breadcrumbs);
|
||||
// dbg!(&self.options.limit_headings);
|
||||
|
||||
let breadcrumb_set =
|
||||
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
|
||||
|
@ -164,7 +164,7 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
|
|||
.count()
|
||||
!= 0;
|
||||
|
||||
dbg!(self.in_public_heading);
|
||||
// dbg!(self.in_public_heading);
|
||||
if self.in_public_heading {
|
||||
self.inner.start(w, &element)?
|
||||
}
|
||||
|
|
317
src/parse.rs
317
src/parse.rs
|
@ -19,10 +19,16 @@ pub fn parse_document(path: String) -> Result<Document> {
|
|||
&org,
|
||||
&orgize::ParseConfig {
|
||||
// Need to pull these from environment or options...
|
||||
todo_keywords: (vec!["NEXT".to_string(), "INPROGRESS".to_string(), "WAITING".to_string()],
|
||||
vec!["DONE".to_string(), "CANCELLED".to_string()]),
|
||||
todo_keywords: (
|
||||
vec![
|
||||
"NEXT".to_string(),
|
||||
"INPROGRESS".to_string(),
|
||||
"WAITING".to_string(),
|
||||
],
|
||||
vec!["DONE".to_string(), "CANCELLED".to_string()],
|
||||
),
|
||||
..Default::default()
|
||||
}
|
||||
},
|
||||
);
|
||||
let keywords = extract_metadata(path.clone(), org_tree)?;
|
||||
let headings = extract_headings(path.clone(), org_tree)?;
|
||||
|
@ -53,192 +59,197 @@ pub fn extract_headings(path: String, tree: &Org) -> Result<Vec<Heading>> {
|
|||
// Extracting Arroyo Headings:1 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:2]]
|
||||
let mut in_drawer: bool = false;
|
||||
let mut id_crumbs: Vec<Option<String>> = Vec::new();
|
||||
let mut cur_id: Option<String> = None;
|
||||
let mut cur_level: usize = 0;
|
||||
let mut headings: Vec<Heading> = Vec::new();
|
||||
headings.push(Heading::default());
|
||||
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
|
||||
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
|
||||
let mut in_drawer: bool = false;
|
||||
let mut id_crumbs: Vec<Option<String>> = Vec::new();
|
||||
let mut cur_id: Option<String> = None;
|
||||
let mut cur_level: usize = 0;
|
||||
let mut headings: Vec<Heading> = Vec::new();
|
||||
headings.push(Heading::default());
|
||||
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
|
||||
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
|
||||
// Extracting Arroyo Headings:2 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:3]]
|
||||
// file level metadata + filetags
|
||||
let file_metadata = extract_metadata(path.clone(), tree)?;
|
||||
let filetags = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "filetags")
|
||||
{
|
||||
Some(kw) => kw
|
||||
.value
|
||||
.split(':')
|
||||
.map(|s| s.to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect(),
|
||||
_ => Vec::<String>::new(),
|
||||
};
|
||||
headings[0].tags = Some(filetags.clone());
|
||||
// file level metadata + filetags
|
||||
let file_metadata = extract_metadata(path.clone(), tree)?;
|
||||
let filetags = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "filetags")
|
||||
{
|
||||
Some(kw) => kw
|
||||
.value
|
||||
.split(':')
|
||||
.map(|s| s.to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect(),
|
||||
_ => Vec::<String>::new(),
|
||||
};
|
||||
headings[0].tags = Some(filetags.clone());
|
||||
// Extracting Arroyo Headings:3 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:4]]
|
||||
// Extract document title and apply to level 0 heading
|
||||
let doc_title = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "title")
|
||||
{
|
||||
Some(kw) => kw.value.clone(),
|
||||
_ => String::from(""),
|
||||
};
|
||||
headings[0].text = doc_title;
|
||||
// Extract document title and apply to level 0 heading
|
||||
let doc_title = match file_metadata
|
||||
.iter()
|
||||
.find(|kw| kw.keyword.to_lowercase() == "title")
|
||||
{
|
||||
Some(kw) => kw.value.clone(),
|
||||
_ => String::from(""),
|
||||
};
|
||||
headings[0].text = doc_title;
|
||||
// Extracting Arroyo Headings:4 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:5]]
|
||||
// state machine go brrr
|
||||
tree.iter()
|
||||
.map(|event| {
|
||||
match event {
|
||||
// state machine go brrr
|
||||
tree.iter()
|
||||
.map(|event| {
|
||||
match event {
|
||||
// Extracting Arroyo Headings:5 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*Heading parser][Heading parser:1]]
|
||||
Event::Start(orgize::Element::Title(title)) => {
|
||||
let properties = title.properties.clone().into_hash_map();
|
||||
cur_id = properties.get("ID").map(|id| id.clone().into());
|
||||
Event::Start(orgize::Element::Title(title)) => {
|
||||
let tmp_properties = title.properties.clone().into_hash_map();
|
||||
let mut export_properties: HashMap<String, String> = HashMap::new();
|
||||
tmp_properties.iter().for_each(|(k, v)| {
|
||||
export_properties.insert(k.to_string(), v.to_string());
|
||||
});
|
||||
cur_id = export_properties.get("ID").cloned();
|
||||
|
||||
id_crumbs.truncate(cur_level + 1);
|
||||
id_crumbs.push(cur_id.clone());
|
||||
id_crumbs.truncate(cur_level + 1);
|
||||
id_crumbs.push(cur_id.clone());
|
||||
|
||||
let refs = properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
let aliases = properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
cur_level = title.level;
|
||||
let refs = export_properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
let aliases = export_properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
cur_level = title.level;
|
||||
|
||||
// reset the tags table
|
||||
inherited_tags.truncate(cur_level - 1);
|
||||
let new_tags: Vec<String> = title
|
||||
.tags
|
||||
.iter()
|
||||
.map(|mbox| mbox.clone().to_string())
|
||||
.collect();
|
||||
inherited_tags.push(new_tags);
|
||||
// reset the tags table
|
||||
inherited_tags.truncate(cur_level - 1);
|
||||
let new_tags: Vec<String> = title
|
||||
.tags
|
||||
.iter()
|
||||
.map(|mbox| mbox.clone().to_string())
|
||||
.collect();
|
||||
inherited_tags.push(new_tags);
|
||||
|
||||
let most_tags = inherited_tags.concat();
|
||||
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
|
||||
let most_tags = inherited_tags.concat();
|
||||
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
|
||||
|
||||
let h = Heading {
|
||||
id: cur_id.clone(),
|
||||
level: cur_level,
|
||||
text: title.raw.to_string(),
|
||||
tags: match all_tags.len() {
|
||||
0 => None,
|
||||
_ => Some(all_tags),
|
||||
},
|
||||
refs,
|
||||
aliases,
|
||||
..Default::default()
|
||||
};
|
||||
headings.push(h);
|
||||
Ok(())
|
||||
}
|
||||
let h = Heading {
|
||||
id: cur_id.clone(),
|
||||
level: cur_level,
|
||||
text: title.raw.to_string(),
|
||||
tags: match all_tags.len() {
|
||||
0 => None,
|
||||
_ => Some(all_tags),
|
||||
},
|
||||
properties: export_properties,
|
||||
refs,
|
||||
aliases,
|
||||
..Default::default()
|
||||
};
|
||||
headings.push(h);
|
||||
Ok(())
|
||||
}
|
||||
// Heading parser:1 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:1]]
|
||||
Event::Start(orgize::Element::Drawer(drawer)) => {
|
||||
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
|
||||
Ok(())
|
||||
}
|
||||
Event::Start(orgize::Element::Drawer(drawer)) => {
|
||||
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
|
||||
Ok(())
|
||||
}
|
||||
// File-level Property Drawer parsing:1 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:2]]
|
||||
Event::Start(orgize::Element::Text { value }) => {
|
||||
if in_drawer {
|
||||
// this is where we rely on forked orgize
|
||||
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
|
||||
orgize::elements::Drawer::parse_drawer_content(value)
|
||||
.expect("failed to parse properties drawer");
|
||||
let properties = prop_drawer.into_hash_map();
|
||||
Event::Start(orgize::Element::Text { value }) => {
|
||||
if in_drawer {
|
||||
// this is where we rely on forked orgize
|
||||
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
|
||||
orgize::elements::Drawer::parse_drawer_content(value)
|
||||
.expect("failed to parse properties drawer");
|
||||
let properties = prop_drawer.into_hash_map();
|
||||
|
||||
// update cur_id and heading 0 ID since this is
|
||||
// implied to be the first drawer, but it's kind
|
||||
// of :yikes: to think about it like that! we
|
||||
// could be genious enough to have a floating
|
||||
// PROPERTIES drawer that would muck things up
|
||||
cur_id = properties.get("ID").map(|s| s.to_string());
|
||||
if cur_id.is_none() {
|
||||
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
|
||||
}
|
||||
// update cur_id and heading 0 ID since this is
|
||||
// implied to be the first drawer, but it's kind
|
||||
// of :yikes: to think about it like that! we
|
||||
// could be genious enough to have a floating
|
||||
// PROPERTIES drawer that would muck things up
|
||||
cur_id = properties.get("ID").map(|s| s.to_string());
|
||||
if cur_id.is_none() {
|
||||
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
|
||||
}
|
||||
|
||||
id_crumbs = vec![cur_id.clone()];
|
||||
headings[0].id = cur_id.clone();
|
||||
id_crumbs = vec![cur_id.clone()];
|
||||
headings[0].id = cur_id.clone();
|
||||
|
||||
headings[0].aliases = properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
headings[0].refs = properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
}
|
||||
headings[0].aliases = properties
|
||||
.get("ROAM_ALIASES")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
headings[0].refs = properties
|
||||
.get("ROAM_REFS")
|
||||
.map(|s| split_quoted_string(s.to_string()).ok())
|
||||
.unwrap_or(Some(vec![]));
|
||||
}
|
||||
|
||||
if headings[0].id.is_none() {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"Root ID is None in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
if headings[0].id.is_none() {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"Root ID is None in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// File-level Property Drawer parsing:2 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:3]]
|
||||
Event::End(orgize::Element::Drawer(_drawer)) => {
|
||||
in_drawer = false;
|
||||
Ok(())
|
||||
}
|
||||
Event::End(orgize::Element::Drawer(_drawer)) => {
|
||||
in_drawer = false;
|
||||
Ok(())
|
||||
}
|
||||
// File-level Property Drawer parsing:3 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*Link parsing][Link parsing:1]]
|
||||
// Stash links outside the match block in a HashMap shape
|
||||
// of heading id -> list of links; it would be nice if the
|
||||
// match block returned an Option<Link> but that doesn't
|
||||
// play well with the rest of the state machine
|
||||
Event::Start(orgize::Element::Link(link)) => {
|
||||
let dest = link.path.to_string();
|
||||
let (proto, stripped_dest): (Option<String>, String) =
|
||||
match dest.split_once(':') {
|
||||
Some((proto, stripped_dest)) => {
|
||||
(Some(proto.to_string()), stripped_dest.to_string())
|
||||
}
|
||||
None => (None, dest.clone()),
|
||||
};
|
||||
// Stash links outside the match block in a HashMap shape
|
||||
// of heading id -> list of links; it would be nice if the
|
||||
// match block returned an Option<Link> but that doesn't
|
||||
// play well with the rest of the state machine
|
||||
Event::Start(orgize::Element::Link(link)) => {
|
||||
let dest = link.path.to_string();
|
||||
let (proto, stripped_dest): (Option<String>, String) =
|
||||
match dest.split_once(':') {
|
||||
Some((proto, stripped_dest)) => {
|
||||
(Some(proto.to_string()), stripped_dest.to_string())
|
||||
}
|
||||
None => (None, dest.clone()),
|
||||
};
|
||||
|
||||
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
|
||||
Some(last_non_none) => last_non_none,
|
||||
None => {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"no non-none ID in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
};
|
||||
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
|
||||
Some(last_non_none) => last_non_none,
|
||||
None => {
|
||||
return Err(InvalidDocError::new_err(format!(
|
||||
"no non-none ID in {}",
|
||||
path
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
|
||||
link_list.push(Link {
|
||||
from_file: path.clone().to_string(),
|
||||
from_id: last_non_none.clone(),
|
||||
to: stripped_dest.clone(),
|
||||
to_proto: proto.clone(),
|
||||
text: link.desc.clone().map(String::from),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
|
||||
link_list.push(Link {
|
||||
from_file: path.clone().to_string(),
|
||||
from_id: last_non_none.clone(),
|
||||
to: stripped_dest.clone(),
|
||||
to_proto: proto.clone(),
|
||||
text: link.desc.clone().map(String::from),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
// Link parsing:1 ends here
|
||||
|
||||
// [[file:../arroyo-native-parser.org::*Cleaning up][Cleaning up:1]]
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
use pyo3::exceptions::PyException;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::pyclass;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::fmt;
|
||||
|
||||
|
@ -106,6 +107,8 @@ pub struct Heading {
|
|||
#[pyo3(get)]
|
||||
pub text: String,
|
||||
#[pyo3(get)]
|
||||
pub properties: HashMap<String, String>,
|
||||
#[pyo3(get)]
|
||||
pub tags: Option<Vec<String>>,
|
||||
#[pyo3(get)]
|
||||
pub refs: Option<Vec<String>>,
|
||||
|
@ -120,13 +123,14 @@ impl fmt::Display for Heading {
|
|||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links)",
|
||||
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links, props {:?})",
|
||||
self.id.clone().unwrap_or("None".to_owned()),
|
||||
self.text,
|
||||
self.tags.clone().unwrap_or(vec![]).len(),
|
||||
self.refs.clone().unwrap_or(vec![]).len(),
|
||||
self.aliases.clone().unwrap_or(vec![]).len(),
|
||||
self.links.clone().unwrap_or(vec![]).len(),
|
||||
self.properties.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue