replace dangling roam:links with 404 pages
parent
fb4c579d74
commit
ff83d0fae7
|
@ -61,21 +61,31 @@ end
|
|||
|
||||
** Normalizing HTML URLs
|
||||
|
||||
The arcology should route all internal URLs, rewriting any which are not exposed via =ARCOLOGY_KEY= to the [[file:404.org][404 page]]
|
||||
The arcology should route all internal URLs, rewriting any which are not exposed via =ARCOLOGY_KEY= to the [[file:404.org][404 page]]. the URLs which are =roam:= prefixed are dangling links in my [[file:../knowledge_base.org][Knowledge Base]], they otherwise would be rewritten to a real page and those are turned in to a broken link. I'd like to include some statistics for these at some point so I can have some awareness of the pages people are interested in, but that's not important right now.
|
||||
|
||||
#+begin_src elixir :noweb-ref normalize_urls
|
||||
@doc "Rewrite the HTML anchors in the passed binary from org links to local-routing HTML links"
|
||||
def normalize_urls(input_html) when is_binary(input_html) do
|
||||
Regex.replace(
|
||||
intermediate = Regex.replace(
|
||||
~r/<a href="([0-9a-zA-Z_\- \/]+.org)">/,
|
||||
input_html,
|
||||
&normalize_url(&1, &2)
|
||||
&normalize_org_url(&1, &2)
|
||||
)
|
||||
|
||||
Regex.replace(
|
||||
~r/<a href="(roam:[0-9a-zA-Z_\- \/,.!?]+)">roam:/,
|
||||
intermediate,
|
||||
&normalize_roam_url(&1, &2)
|
||||
)
|
||||
end
|
||||
|
||||
defp normalize_url(_match, page) do
|
||||
defp normalize_roam_url(_match, page) do
|
||||
~s(<a href="/arcology/404.html" class="dead-link">)
|
||||
end
|
||||
|
||||
defp normalize_org_url(_match, page) do
|
||||
# arc_dir = Application.get_env(:arcology, :env)[:arcology_directory]
|
||||
key = Arcology.Roam.Keyword.from_file(page, "ARCOLOGY_KEY")
|
||||
key = Arcology.Roam.Keyword.from_file(page|>IO.inspect, "ARCOLOGY_KEY")
|
||||
|
||||
cond do
|
||||
key == nil ->
|
||||
|
@ -88,7 +98,7 @@ defp normalize_url(_match, page) do
|
|||
end
|
||||
|
||||
@doc "This is for tests, ignore the man behind the curtain"
|
||||
def test_normalize(page), do: normalize_url(nil, page)
|
||||
def test_normalize(page), do: normalize_org_url(nil, page)
|
||||
#+end_src
|
||||
|
||||
=normalize_url/2= is called by the =Regex.replace= in =normalize_urls/1=, the second argument is all we care about, it comes out of the capture in the expression, and is the string of the relative file-name in all likelihood; this is not really consistent and it *is* something that I need to resolve in my [[file:../cce/org-roam.org][org-roam]] and [[file:../org-mode.org][org-mode]] configurations. This is going to have a lot of data-validity issues, and I kind-of wish that it was easier to quickly get an [[file:arcology_roam.org][Arcology.Roam.Link]] from this =page= variable to get to a [[file:arcology_roam.org][Arcology.Roam.Keyword]].
|
||||
|
|
Loading…
Reference in New Issue