226 lines
8.5 KiB

(local file (require :pl.file))
(local lapp (require :pl.lapp))
(local path (require :pl.path))
(local pretty (require :pl.pretty))
(local stringx (require :pl.stringx))
(local tablex (require :pl.tablex))
(local text (require :pl.text))
(local wallabag (require :wallabag))
(local api-prefix wallabag.api-prefix)
(local sha256 (require :hashings.sha256))
(fn collect-highlights [base-path match-name converter-fn]
(let [proc (io.popen (.. "bash -c 'find " base-path " -regextype posix-egrep -regex \"" match-name "\" -type f'"))]
(local output [])
(each [line (proc:lines)]
(converter-fn ((loadfile line)) line)))
(fn init-datastructure [md-path from-md]
(let [props (?. from-md :doc_props)
stats (?. from-md :stats)]
{ "path" md-path
"authors" (.. (?. props :authors))
"title" (?. props :title)
"series" (?. props :series)
"md5" (or (?. stats :md5) (?. from-md :partial_md5_checksum))
"highlights" []}))
(fn sort-by-datetime [first second]
(< (. first "datetime")
(. second "datetime")))
(fn sort-by-page-no [first second]
(< (. first "page")
(. second "page")))
(fn find-chapter-by-name [highlights name]
(tablex.find_if highlights (lambda [v]
(= (. v :name) name))))
(fn process-one-book [metadata file-name]
(let [bookmarks (. metadata "bookmarks")]
(local output (init-datastructure file-name metadata)) ;;
(print "processing..." (?. (?. metadata "doc_props") "title"))
(var sum (accumulate [total 0 _i1
inner-tbl (tablex.sortv bookmarks sort-by-datetime)] ;;
(let [chapter (or (?. inner-tbl "chapter") "")
(or (find-chapter-by-name output.highlights chapter)
(table.insert output.highlights {:name chapter})
(length output.highlights)))
(fn mk-highlight [chapter highlight-tbl]
{ "datetime" (?. highlight-tbl "datetime")
"chapter" chapter
"locs" [(?. highlight-tbl "pos0")
(?. highlight-tbl "pos1")]
"text" (or (?. highlight-tbl "text")
(?. highlight-tbl "notes"))})
(. output.highlights chapter-idx-maybe)
(mk-highlight chapter inner-tbl)))
(+ total 1))))
(print "parsed" sum "highlights")
(local wallabag-token (->> ".wallabag"
(wallabag.get-token (.. api-prefix "/oauth/v2/token"))))
(fn get-single-entry-from-wallabag [id]
(let [(headers body) (wallabag.api-req wallabag-token "GET" (.. api-prefix "/api/entries/" id ".json"))]
(fn get-wallabag-url [id]
(. (get-single-entry-from-wallabag id) :url))
(local template (. text :Template))
(local highlight-tmpl (template
"** ${text}
:ID: ${id}
:LOC0: ${loc0}
:LOC1: ${loc1}
:PAGE: ${page}
(fn render-one-highlight [hl]
(let [locs (. hl :locs)
digest (: sha256 :new (.. (. hl :text) (. hl :datetime)))
hexdigest (: digest :hexdigest)
fields (tablex.update
{ :datetime (. hl :datetime)
:text (-> (. hl :text)
(: :gsub "%$" "$ ")
(: :gsub "\n" "¶ ")
:id hexdigest }
(if (= (type (. locs 1)) "table")
{ :page (or (?. hl :page) (?. (. locs 1) :page) "")
:loc0 ""
:loc1 ""}
{ :page ""
:loc0 (or (. locs 1) "")
:loc1 (or (. locs 2) "")}))]
(: highlight-tmpl :substitute
(local chapter-tmpl (template
"* ${chapter}
(fn render-one-chapter [chapter]
(let [name (?. chapter :name)
copy (tablex.deepcopy chapter)]
(tset copy :name nil)
(stringx.join "\n"
(tablex.insertvalues [(: chapter-tmpl :substitute {:chapter name})]
(icollect [_i2 hl (ipairs copy)]
(values (render-one-highlight hl)))))))
(local book-tmpl (template
:ID: koreader-${md5}
:ROAM_REFS: \"${path}\"
#+TITLE: Notes from ${title}
#+AUTHORS: ${authors}
(fn munge-book-path [book-md]
(let [path (. book-md :path)
(_ _ bag-id) (string.find path "%[w%-id_(%d+)%]")]
(if bag-id
(print "bag id" bag-id)
(set book-md.path (get-wallabag-url bag-id)))
;; sickos.jpg
(set book-md.path (.. "file:"
(string.gsub path "sdr/metadata.([^.]+).lua" "%1"))))))
(fn render-one-book [book]
(let [authors (?. book "authors")
title (?. book "title")]
(munge-book-path book)
(.. (: book-tmpl :substitute book)
(stringx.join "\n"
(icollect [_i1 chapter-hls (pairs (?. book "highlights"))]
(render-one-chapter chapter-hls))))))
(fn maybe-write-to-file [src-file dest-file render-fn]
(let [dest-file2 (path.expanduser dest-file)
book-mtime (file.modified_time src-file)
notes-mtime (file.modified_time dest-file2)]
(print "Targeting..." dest-file2)
(if (or (not notes-mtime) ;;
(< notes-mtime book-mtime))
(match (io.output dest-file2)
(nil msg) (print "Could not write file... " msg)
f (let [text (render-fn)]
(io.write text)
(io.close f)
(print "Rendered..." (length text))))
(print "Skipping ... " dest-file2))))
(fn write-one-book [book out-dir]
(let [book-path (?. book :path)
title (?. book :title)
notes-path (path.join out-dir (.. (: title :gsub "[:/\\ ]" "_") ".org"))]
(print "Maybe rendering" book-path)
(print "to" notes-path)
(maybe-write-to-file book-path
(lambda []
(print "rendering" (accumulate [sum 0 i chap (ipairs (. book :highlights))]
(+ sum (length chap))) "highlights to string")
(render-one-book book)))))
(fn write-one-book-from-md [md filename out-dir]
(let [book (process-one-book md filename)]
(write-one-book book out-dir)
(fn collect-epub-highlights [books-path out-path]
(collect-highlights books-path ".*sdr/metadata.(epub|mobi).lua"
(lambda [book book-path]
(write-one-book-from-md book book-path out-path))))
(fn collect-pdf-highlights [books-path out-path]
(collect-highlights books-path ".*sdr/metadata.pdf.lua"
(lambda [book book-path]
(write-one-book-from-md book book-path out-path))))
(let [default-book-dir "~/mobile-library/"
default-note-dir "~/org/highlights/"
args (lapp (stringx.join
["Parse koreader metadata files in to org-mode notes"
"-f,--file (optional string) only parse one metadata.*.lua file"
(.. "-e,--epubs (optional string) parse epubs from here, otherwise " default-book-dir)
(.. "-p,--pdfs (optional string) parse pdfs from here, otherwise " default-book-dir)
(.. "-n,--notes (optional string) write outputs to directory: " default-note-dir)
file-name (?. args :file)
file? (not (not file-name))
epub-dir (or (. args :epubs) default-book-dir)
pdf-dir (or (. args :pdfs) default-book-dir)
notes-dir (or (. args :notes) default-note-dir)]
(if file?
(write-one-book-from-md ((loadfile file-name)) file-name notes-dir)
(collect-epub-highlights epub-dir notes-dir)
(collect-pdf-highlights pdf-dir notes-dir))))