Compare commits

...

2 Commits

Author SHA1 Message Date
Ryan Rix 52258e2db7 remove some dbgs i put in place when writing this 2024-02-04 18:53:34 -08:00
Ryan Rix be91137ba8 export PropertiesMap in the "public" entities 2024-02-04 18:53:16 -08:00
4 changed files with 344 additions and 314 deletions

View File

@ -229,6 +229,7 @@ Let's start by defining the types. We use =pyo3= macro annotations so that these
use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use pyo3::pyclass;
use std::collections::HashMap;
use std::fmt;
@ -360,6 +361,8 @@ pub struct Heading {
#[pyo3(get)]
pub text: String,
#[pyo3(get)]
pub properties: HashMap<String, String>,
#[pyo3(get)]
pub tags: Option<Vec<String>>,
#[pyo3(get)]
pub refs: Option<Vec<String>>,
@ -374,13 +377,14 @@ impl fmt::Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links)",
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links, props {:?})",
self.id.clone().unwrap_or("None".to_owned()),
self.text,
self.tags.clone().unwrap_or(vec![]).len(),
self.refs.clone().unwrap_or(vec![]).len(),
self.aliases.clone().unwrap_or(vec![]).len(),
self.links.clone().unwrap_or(vec![]).len(),
self.properties.clone(),
)
}
}
@ -475,10 +479,16 @@ pub fn parse_document(path: String) -> Result<Document> {
&org,
&orgize::ParseConfig {
// Need to pull these from environment or options...
todo_keywords: (vec!["NEXT".to_string(), "INPROGRESS".to_string(), "WAITING".to_string()],
vec!["DONE".to_string(), "CANCELLED".to_string()]),
todo_keywords: (
vec![
"NEXT".to_string(),
"INPROGRESS".to_string(),
"WAITING".to_string(),
],
vec!["DONE".to_string(), "CANCELLED".to_string()],
),
..Default::default()
}
},
);
let keywords = extract_metadata(path.clone(), org_tree)?;
let headings = extract_headings(path.clone(), org_tree)?;
@ -532,57 +542,57 @@ There are some mutable variables at the top of this function which are used for
- =inherited_tags= is a list of lists of strings; the inner vector contains the list of tags for each header, starting at level 0 for =FILETAGS= entries. Combining this structure and =cur_level= allows the parser to perform tag inheritance by flattening the list, and by dropping everything "above" the current level when stepping to another header.
#+begin_src rust
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
#+END_SRC
=FILETAG= parsing is a bit nasty to read, but basically the Keyword's value is colon-separated list of strings, these are split and collected and stored in the root heading.
#+begin_src rust
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
#+END_SRC
The root heading's title is the value of the document's =#+TITLE= keyword:
#+begin_src rust
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
#+END_SRC
And now we step in to the state machine. It iterates over each element, providing an =Event::Start= and =Event::End= for each element that the parser supports:
#+begin_src rust
// state machine go brrr
tree.iter()
.map(|event| {
match event {
// state machine go brrr
tree.iter()
.map(|event| {
match event {
#+END_SRC
*** Heading parser
@ -594,50 +604,55 @@ tree.iter()
- Stash the heading in the return vector
#+begin_src rust
Event::Start(orgize::Element::Title(title)) => {
let properties = title.properties.clone().into_hash_map();
cur_id = properties.get("ID").map(|id| id.clone().into());
Event::Start(orgize::Element::Title(title)) => {
let tmp_properties = title.properties.clone().into_hash_map();
let mut export_properties: HashMap<String, String> = HashMap::new();
tmp_properties.iter().for_each(|(k, v)| {
export_properties.insert(k.to_string(), v.to_string());
});
cur_id = export_properties.get("ID").cloned();
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
let refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
let refs = export_properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = export_properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
properties: export_properties,
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
#+END_SRC
**** NEXT I should be doing something like the =inherited_tags= stuff to track =cur_id= inheritance...
@ -655,10 +670,10 @@ Handling the file-level properties drawer is a bit of a pain -- some day I'll ro
When entering a drawer, the parser sets that =in_drawer= state variable; This is a bit boogy since in theory this could be a floating =PROPERTIES= drawer defined anywhere, but my org-mode docs are shaped reasonably enough that we'll cross that rubicon when someone else uses this.
#+begin_src rust
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
#+END_SRC
If the parser encounters a Text block while inside of a drawer, that needs to be parsed, and then the keys and whatnot are shoved in to the root Heading.
@ -666,55 +681,55 @@ If the parser encounters a Text block while inside of a drawer, that needs to be
The drawer is assumed to be a key/value list as in the =PROPERTIES= drawers; this relies on my fork of =orgize= which exposes =parse_drawer_contents=. I *think* this should be able to use =prop_drawer.get= as in the code handling =Headings= above, and then these should be de-duplicated.
#+begin_src rust
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
Ok(())
}
Ok(())
}
#+END_SRC
When we exit the Drawer, the state value is cleared.
#+begin_src rust
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
#+END_SRC
**** NEXT fix orgize to expose file-level propertiesmap
@ -726,40 +741,40 @@ Look; I'm gonna be honest here. I don't remember why the links are stored outsid
(maybe because they may have None IDs in the from_id?)
#+begin_src rust
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
#+END_SRC
*** NEXT Attachment and image caching
@ -1020,8 +1035,8 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
self.heading_breadcrumbs.push(id.to_string());
}
}
dbg!(&self.heading_breadcrumbs);
dbg!(&self.options.limit_headings);
// dbg!(&self.heading_breadcrumbs);
// dbg!(&self.options.limit_headings);
let breadcrumb_set =
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
@ -1032,7 +1047,7 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
.count()
!= 0;
dbg!(self.in_public_heading);
// dbg!(self.in_public_heading);
if self.in_public_heading {
self.inner.start(w, &element)?
}

View File

@ -152,8 +152,8 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
self.heading_breadcrumbs.push(id.to_string());
}
}
dbg!(&self.heading_breadcrumbs);
dbg!(&self.options.limit_headings);
// dbg!(&self.heading_breadcrumbs);
// dbg!(&self.options.limit_headings);
let breadcrumb_set =
HashSet::from_iter(self.heading_breadcrumbs.clone().into_iter());
@ -164,7 +164,7 @@ impl<E: From<Error>, H: HtmlHandler<E>> HtmlHandler<E> for ArroyoHtmlHandler<E,
.count()
!= 0;
dbg!(self.in_public_heading);
// dbg!(self.in_public_heading);
if self.in_public_heading {
self.inner.start(w, &element)?
}

View File

@ -19,10 +19,16 @@ pub fn parse_document(path: String) -> Result<Document> {
&org,
&orgize::ParseConfig {
// Need to pull these from environment or options...
todo_keywords: (vec!["NEXT".to_string(), "INPROGRESS".to_string(), "WAITING".to_string()],
vec!["DONE".to_string(), "CANCELLED".to_string()]),
todo_keywords: (
vec![
"NEXT".to_string(),
"INPROGRESS".to_string(),
"WAITING".to_string(),
],
vec!["DONE".to_string(), "CANCELLED".to_string()],
),
..Default::default()
}
},
);
let keywords = extract_metadata(path.clone(), org_tree)?;
let headings = extract_headings(path.clone(), org_tree)?;
@ -53,192 +59,197 @@ pub fn extract_headings(path: String, tree: &Org) -> Result<Vec<Heading>> {
// Extracting Arroyo Headings:1 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:2]]
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
let mut in_drawer: bool = false;
let mut id_crumbs: Vec<Option<String>> = Vec::new();
let mut cur_id: Option<String> = None;
let mut cur_level: usize = 0;
let mut headings: Vec<Heading> = Vec::new();
headings.push(Heading::default());
let mut links: HashMap<String, Vec<Link>> = HashMap::new();
let mut inherited_tags: Vec<Vec<String>> = Vec::new();
// Extracting Arroyo Headings:2 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:3]]
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
// file level metadata + filetags
let file_metadata = extract_metadata(path.clone(), tree)?;
let filetags = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "filetags")
{
Some(kw) => kw
.value
.split(':')
.map(|s| s.to_string())
.filter(|s| !s.is_empty())
.collect(),
_ => Vec::<String>::new(),
};
headings[0].tags = Some(filetags.clone());
// Extracting Arroyo Headings:3 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:4]]
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
// Extract document title and apply to level 0 heading
let doc_title = match file_metadata
.iter()
.find(|kw| kw.keyword.to_lowercase() == "title")
{
Some(kw) => kw.value.clone(),
_ => String::from(""),
};
headings[0].text = doc_title;
// Extracting Arroyo Headings:4 ends here
// [[file:../arroyo-native-parser.org::*Extracting Arroyo Headings][Extracting Arroyo Headings:5]]
// state machine go brrr
tree.iter()
.map(|event| {
match event {
// state machine go brrr
tree.iter()
.map(|event| {
match event {
// Extracting Arroyo Headings:5 ends here
// [[file:../arroyo-native-parser.org::*Heading parser][Heading parser:1]]
Event::Start(orgize::Element::Title(title)) => {
let properties = title.properties.clone().into_hash_map();
cur_id = properties.get("ID").map(|id| id.clone().into());
Event::Start(orgize::Element::Title(title)) => {
let tmp_properties = title.properties.clone().into_hash_map();
let mut export_properties: HashMap<String, String> = HashMap::new();
tmp_properties.iter().for_each(|(k, v)| {
export_properties.insert(k.to_string(), v.to_string());
});
cur_id = export_properties.get("ID").cloned();
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
id_crumbs.truncate(cur_level + 1);
id_crumbs.push(cur_id.clone());
let refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
let refs = export_properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
let aliases = export_properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
cur_level = title.level;
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
// reset the tags table
inherited_tags.truncate(cur_level - 1);
let new_tags: Vec<String> = title
.tags
.iter()
.map(|mbox| mbox.clone().to_string())
.collect();
inherited_tags.push(new_tags);
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let most_tags = inherited_tags.concat();
let all_tags: Vec<String> = [filetags.clone(), most_tags].concat();
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
let h = Heading {
id: cur_id.clone(),
level: cur_level,
text: title.raw.to_string(),
tags: match all_tags.len() {
0 => None,
_ => Some(all_tags),
},
properties: export_properties,
refs,
aliases,
..Default::default()
};
headings.push(h);
Ok(())
}
// Heading parser:1 ends here
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:1]]
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
Event::Start(orgize::Element::Drawer(drawer)) => {
in_drawer = drawer.name == "PROPERTIES" && headings[0].id.is_none();
Ok(())
}
// File-level Property Drawer parsing:1 ends here
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:2]]
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
Event::Start(orgize::Element::Text { value }) => {
if in_drawer {
// this is where we rely on forked orgize
let (_, prop_drawer): (_, orgize::elements::PropertiesMap) =
orgize::elements::Drawer::parse_drawer_content(value)
.expect("failed to parse properties drawer");
let properties = prop_drawer.into_hash_map();
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
// update cur_id and heading 0 ID since this is
// implied to be the first drawer, but it's kind
// of :yikes: to think about it like that! we
// could be genious enough to have a floating
// PROPERTIES drawer that would muck things up
cur_id = properties.get("ID").map(|s| s.to_string());
if cur_id.is_none() {
cur_id = properties.get("CUSTOM_ID").map(|s| s.to_string())
}
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
id_crumbs = vec![cur_id.clone()];
headings[0].id = cur_id.clone();
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
headings[0].aliases = properties
.get("ROAM_ALIASES")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
headings[0].refs = properties
.get("ROAM_REFS")
.map(|s| split_quoted_string(s.to_string()).ok())
.unwrap_or(Some(vec![]));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
if headings[0].id.is_none() {
return Err(InvalidDocError::new_err(format!(
"Root ID is None in {}",
path
)));
}
Ok(())
}
Ok(())
}
// File-level Property Drawer parsing:2 ends here
// [[file:../arroyo-native-parser.org::*File-level Property Drawer parsing][File-level Property Drawer parsing:3]]
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
Event::End(orgize::Element::Drawer(_drawer)) => {
in_drawer = false;
Ok(())
}
// File-level Property Drawer parsing:3 ends here
// [[file:../arroyo-native-parser.org::*Link parsing][Link parsing:1]]
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
// Stash links outside the match block in a HashMap shape
// of heading id -> list of links; it would be nice if the
// match block returned an Option<Link> but that doesn't
// play well with the rest of the state machine
Event::Start(orgize::Element::Link(link)) => {
let dest = link.path.to_string();
let (proto, stripped_dest): (Option<String>, String) =
match dest.split_once(':') {
Some((proto, stripped_dest)) => {
(Some(proto.to_string()), stripped_dest.to_string())
}
None => (None, dest.clone()),
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let last_non_none = match id_crumbs.iter().rev().find_map(|x| x.clone()) {
Some(last_non_none) => last_non_none,
None => {
return Err(InvalidDocError::new_err(format!(
"no non-none ID in {}",
path
)));
}
};
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
let link_list = links.entry(last_non_none.clone()).or_insert(Vec::new());
link_list.push(Link {
from_file: path.clone().to_string(),
from_id: last_non_none.clone(),
to: stripped_dest.clone(),
to_proto: proto.clone(),
text: link.desc.clone().map(String::from),
});
Ok(())
}
// Link parsing:1 ends here
// [[file:../arroyo-native-parser.org::*Cleaning up][Cleaning up:1]]

View File

@ -2,6 +2,7 @@
use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use pyo3::pyclass;
use std::collections::HashMap;
use std::fmt;
@ -106,6 +107,8 @@ pub struct Heading {
#[pyo3(get)]
pub text: String,
#[pyo3(get)]
pub properties: HashMap<String, String>,
#[pyo3(get)]
pub tags: Option<Vec<String>>,
#[pyo3(get)]
pub refs: Option<Vec<String>>,
@ -120,13 +123,14 @@ impl fmt::Display for Heading {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links)",
"Heading(id={}, title={}, {} tags, {} refs, {} aliases, {} links, props {:?})",
self.id.clone().unwrap_or("None".to_owned()),
self.text,
self.tags.clone().unwrap_or(vec![]).len(),
self.refs.clone().unwrap_or(vec![]).len(),
self.aliases.clone().unwrap_or(vec![]).len(),
self.links.clone().unwrap_or(vec![]).len(),
self.properties.clone(),
)
}
}