initial import

rrix
Simon Lipp 2014-03-15 11:16:27 +01:00
commit 78b8a6a447
32 changed files with 2071 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
ggs/ggs
maildir-put/maildir-put
rss2json/rss2json
ua-inline/ua-inline
__pycache__

13
COPYING Normal file
View File

@ -0,0 +1,13 @@
Copyright (c) 2014, Simon Lipp
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

49
Makefile Normal file
View File

@ -0,0 +1,49 @@
PREFIX=/usr/local
DESTDIR=
PYTHONVER=$(shell pkg-config --modversion python3 2>/dev/null)
BINDIR=$(DESTDIR)$(PREFIX)/bin
PYLIBDIR=$(DESTDIR)$(PREFIX)/lib/python$(PYTHONVER)/site-packages
DOCDIR=$(DESTDIR)$(PREFIX)/share/doc/ua
MANDIR=$(DESTDIR)$(PREFIX)/share/man
GODIRS=ggs rss2json maildir-put ua-inline
.PHONY: all clean doc
all: ggs/ggs rss2json/rss2json maildir-put/maildir-put ua-inline/ua-inline
doc:
test -d doc || mkdir doc
test -f doc/ua.md || ln -s ../README.md doc/ua.md
for d in $(GODIRS) ; do test -f doc/$$d.md || ln -s ../$$d/README.md doc/$$d.md ; done
cd doc ; for f in *.md ; do ronn $$f ; done
ggs/ggs: ggs/ggs.go
cd ggs; go build
rss2json/rss2json: rss2json/rss2json.go
cd rss2json; go build
maildir-put/maildir-put: maildir-put/maildir-put.go maildir-put/cache.go
cd maildir-put; go build
ua-inline/ua-inline: ua-inline/ua-inline.go
cd ua-inline; go build
install: all
install -d $(BINDIR)
for f in $(GODIRS) ; do install $$f/$$f $(BINDIR)/ ; done
install scrappers/mangareader2json $(BINDIR)/
install scrappers/ipboard2json $(BINDIR)/
test -n "$(PYTHONVER)" && install -d $(PYLIBDIR)
test -n "$(PYTHONVER)" && install scrappers/scraplib.py $(PYLIBDIR)/
install -d $(DOCDIR)
install -d $(MANDIR)/man1/
install ggsrc.example $(DOCDIR)
for f in doc/*.md doc/*.html ; do install $$f $(DOCDIR)/ ; done
for f in $(GODIRS) ; do gzip < doc/$$f > $(MANDIR)/man1/$$f.1.gz ; done
clean:
for f in $(GODIRS) ; do rm -f $$f/$$f ; done

59
README.md Normal file
View File

@ -0,0 +1,59 @@
# The Universal Aggregator
This is a set of tools to aggregate all your information into your
maildir. Each tool can be used separately ; you can find a more complete
description in their respective folder.
* `ggs` is a software which runs commands periodically
* `maildir-put` reads a set of messages from its standard input and puts
them in a maildir
* `rss2json` transforms any RSS/Atom feed into a set of messages that
`maildir-put` can process
* You can write your own producers for maildir-put ; an example for the
[mangareader](http://mangareader.net) service is provided.
* You can also put filters, like `ua-inline`
## Usage
ggs [path-to-configuration-file]
## Dependencies
* Go
* libxml
* For additional scrappers: python 3, aiohttp and pyquery
## Installation
make && sudo make install
## Configuration
See the `ggs` documentation for more information. Here is an sample
configuration file, which puts some feeds into `Fun` and `Geek` folders,
some new chapters notification from mangareader into `Entertainment`,
and my Github personal feed into inbox:
default_timeout=30
rss() {
command 2000 "rss2json \"$1\" | ua-inline | maildir-put -root $HOME/Maildir-feeds -folder \"$2\""
}
mangareader() {
command 2000 "mangareader2json http://mangareader.net/$1 | "\
"maildir-put -root $HOME/Maildir-feeds -folder Entertainment"
}
rss http://xkcd.com/atom.xml Fun
rss http://feeds.feedburner.com/smbc-comics/PvLb Fun
rss http://syndication.thedailywtf.com/TheDailyWtf Fun
rss http://www.reddit.com/r/science/top/.rss Geek
rss http://www.phoronix.com/rss.php Geek
mangareader naruto
mangareader bleach
mangareader gantz
rss https://github.com/sloonz.private.atom?token=HIDDEN ""

58
doc/ggs Normal file
View File

@ -0,0 +1,58 @@
.\" generated with Ronn/v0.7.3
.\" http://github.com/rtomayko/ronn/tree/0.7.3
.
.TH "GGS" "" "March 2014" "" ""
\fBGGS\fR (Grey Goo Spawner) is a simple software that runs jobs periodically\. It is similar with cron, but with some differences :
.
.IP "\(bu" 4
Whereas \fBcron\fR launches jobs at specific times, \fBggs\fR is mainly interested in intervals\. It will run all jobs at its startup and then will re\-run each job after a certain delay has passed\.
.
.IP "\(bu" 4
\fBggs\fR has a system of \fBworkers\fR, similar to many servers (like nginx or Apache with MPM Workers) to limit ressource concurrency between your jobs \.
.
.IP "\(bu" 4
You can define a timeout for your jobs, too\.
.
.IP "" 0
.
.SH "Usage"
\fBggs [configuration file]\fR
.
.P
If no configuration file is provided, \fBggs\fR will use \fB~/\.config/ggsrc\fR by default\.
.
.SH "Installation"
\fBgo build ggs\.go && cp ggs /usr/local/bin\fR
.
.SH "Configuration"
Configuration file is a shell script, so same rule as \fBsh\fR applies\.
.
.P
You create a job with the \fBcommand\fR function, which takes two arguments: the delay between launches, and the command to run\. You can specify a timeout (in seconds) by setting the \fBtimeout\fR environnement variable (optional, default: 0 no timeout)\.
.
.IP "" 4
.
.nf
timeout=30 command 300 "uptime | mail admin@example\.com"
command 5 \'ping \-c 1 github\.com || sudo halt \-p\'
.
.fi
.
.IP "" 0
.
.P
You can also set the number of workers (maximum number of jobs that can run simultaneously):
.
.IP "" 4
.
.nf
workers=5 #Warning: dont do "workers = 5", spaces matters here !
.
.fi
.
.IP "" 0
.
.SH "Advanced configuration"
The configuration file is just a shell script which produces a JSON document which maches the structure of the \fBConfig\fR structure\. You can do \fBexec my_script\fR to produce the same JSON with a script in your favorite language\. You can also use variables, functions, execute external commands, and so on\.\.\.

130
doc/ggs.html Normal file
View File

@ -0,0 +1,130 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv='content-type' value='text/html;charset=utf8'>
<meta name='generator' value='Ronn/v0.7.3 (http://github.com/rtomayko/ronn/tree/0.7.3)'>
<title>GGS</title>
<style type='text/css' media='all'>
/* style: man */
body#manpage {margin:0}
.mp {max-width:100ex;padding:0 9ex 1ex 4ex}
.mp p,.mp pre,.mp ul,.mp ol,.mp dl {margin:0 0 20px 0}
.mp h2 {margin:10px 0 0 0}
.mp > p,.mp > pre,.mp > ul,.mp > ol,.mp > dl {margin-left:8ex}
.mp h3 {margin:0 0 0 4ex}
.mp dt {margin:0;clear:left}
.mp dt.flush {float:left;width:8ex}
.mp dd {margin:0 0 0 9ex}
.mp h1,.mp h2,.mp h3,.mp h4 {clear:left}
.mp pre {margin-bottom:20px}
.mp pre+h2,.mp pre+h3 {margin-top:22px}
.mp h2+pre,.mp h3+pre {margin-top:5px}
.mp img {display:block;margin:auto}
.mp h1.man-title {display:none}
.mp,.mp code,.mp pre,.mp tt,.mp kbd,.mp samp,.mp h3,.mp h4 {font-family:monospace;font-size:14px;line-height:1.42857142857143}
.mp h2 {font-size:16px;line-height:1.25}
.mp h1 {font-size:20px;line-height:2}
.mp {text-align:justify;background:#fff}
.mp,.mp code,.mp pre,.mp pre code,.mp tt,.mp kbd,.mp samp {color:#131211}
.mp h1,.mp h2,.mp h3,.mp h4 {color:#030201}
.mp u {text-decoration:underline}
.mp code,.mp strong,.mp b {font-weight:bold;color:#131211}
.mp em,.mp var {font-style:italic;color:#232221;text-decoration:none}
.mp a,.mp a:link,.mp a:hover,.mp a code,.mp a pre,.mp a tt,.mp a kbd,.mp a samp {color:#0000ff}
.mp b.man-ref {font-weight:normal;color:#434241}
.mp pre {padding:0 4ex}
.mp pre code {font-weight:normal;color:#434241}
.mp h2+pre,h3+pre {padding-left:0}
ol.man-decor,ol.man-decor li {margin:3px 0 10px 0;padding:0;float:left;width:33%;list-style-type:none;text-transform:uppercase;color:#999;letter-spacing:1px}
ol.man-decor {width:100%}
ol.man-decor li.tl {text-align:left}
ol.man-decor li.tc {text-align:center;letter-spacing:4px}
ol.man-decor li.tr {text-align:right;float:right}
</style>
</head>
<!--
The following styles are deprecated and will be removed at some point:
div#man, div#man ol.man, div#man ol.head, div#man ol.man.
The .man-page, .man-decor, .man-head, .man-foot, .man-title, and
.man-navigation should be used instead.
-->
<body id='manpage'>
<div class='mp' id='man'>
<div class='man-navigation' style='display:none'>
<a href="#Usage">Usage</a>
<a href="#Installation">Installation</a>
<a href="#Configuration">Configuration</a>
<a href="#Advanced-configuration">Advanced configuration</a>
</div>
<ol class='man-decor man-head man head'>
<li class='tl'>ggs</li>
<li class='tc'></li>
<li class='tr'>ggs</li>
</ol>
<h1>GGS</h1>
<p><code>GGS</code> (Grey Goo Spawner) is a simple software that runs jobs
periodically. It is similar with cron, but with some differences :</p>
<ul>
<li><p>Whereas <code>cron</code> launches jobs at specific times, <code>ggs</code> is mainly
interested in intervals. It will run all jobs at its startup and then
will re-run each job after a certain delay has passed.</p></li>
<li><p><code>ggs</code> has a system of <code>workers</code>, similar to many servers (like nginx
or Apache with MPM Workers) to limit ressource concurrency between your
jobs .</p></li>
<li><p>You can define a timeout for your jobs, too.</p></li>
</ul>
<h2 id="Usage">Usage</h2>
<p><code>ggs [configuration file]</code></p>
<p>If no configuration file is provided, <code>ggs</code> will use <code>~/.config/ggsrc</code>
by default.</p>
<h2 id="Installation">Installation</h2>
<p><code>go build ggs.go &amp;&amp; cp ggs /usr/local/bin</code></p>
<h2 id="Configuration">Configuration</h2>
<p>Configuration file is a shell script, so same rule as <code>sh</code> applies.</p>
<p>You create a job with the <code>command</code> function, which takes two arguments:
the delay between launches, and the command to run. You can specify a
timeout (in seconds) by setting the <code>timeout</code> environnement variable
(optional, default: 0 no timeout).</p>
<pre><code>timeout=30 command 300 "uptime | mail admin@example.com"
command 5 'ping -c 1 github.com || sudo halt -p'
</code></pre>
<p>You can also set the number of workers (maximum number of jobs that can
run simultaneously):</p>
<pre><code>workers=5 #Warning: dont do "workers = 5", spaces matters here !
</code></pre>
<h2 id="Advanced-configuration">Advanced configuration</h2>
<p>The configuration file is just a shell script which produces a JSON
document which maches the structure of the <code>Config</code> structure. You can do
<code>exec my_script</code> to produce the same JSON with a script in your favorite
language. You can also use variables, functions, execute external
commands, and so on...</p>
<ol class='man-decor man-foot man foot'>
<li class='tl'></li>
<li class='tc'>March 2014</li>
<li class='tr'>ggs</li>
</ol>
</div>
</body>
</html>

1
doc/ggs.md Symbolic link
View File

@ -0,0 +1 @@
../ggs/README.md

70
doc/maildir-put Normal file
View File

@ -0,0 +1,70 @@
.\" generated with Ronn/v0.7.3
.\" http://github.com/rtomayko/ronn/tree/0.7.3
.
.TH "MAILDIR\-PUT" "" "March 2014" "" ""
\fBmaildir\-put\fR is a tool to put messages in a predefined JSON format inside a maildir\. It also try to detect duplicates and drop them\.
.
.SH "Usage"
.
.nf
message\-producer | maildir\-put [arguments]
.
.fi
.
.P
Available arguments:
.
.IP "\(bu" 4
\fI\-cache\fR: path to a cache file used to store message IDs for duplicate detection
.
.IP "\(bu" 4
\fI\-root\fR: path to the root maildir directory\. Defaults to ~/Maildir\.
.
.IP "\(bu" 4
\fI\-folder\fR: maildir folder to put messages\. Defaults to "", the inbox\. The folder separator is "/"\.
.
.IP "" 0
.
.SH "Installation"
.
.nf
go build && cp maildir\-put /usr/local/bin
.
.fi
.
.SH "Input format"
As its input, \fBmaildir\-put\fR takes a stream of JSON dictionaries (not a list of dictionaries)\. Each dictionary represents a message\. Available keys are:
.
.IP "\(bu" 4
\fIbody\fR: the body of the message, in HTML\. Mandatory\.
.
.IP "\(bu" 4
\fItitle\fR: the subject of the message, in text\. Mandatory\.
.
.IP "\(bu" 4
\fIdate\fR: the date of the message\. Optional, defaults to current time\. If provided, must be RFC 2822 compliant\.
.
.IP "\(bu" 4
\fIauthor\fR: the name of the author, in text\. Optional\.
.
.IP "\(bu" 4
\fIauthorEmail\fR: the mail addresse of the author\. Optional\.
.
.IP "\(bu" 4
\fIid\fR: an unique identifier for the message\. It will be used for the creation of the Message\-Id header, as well as in duplicates detection\. It should include three parts: an unique identifier for the application (for example: \fBrss2json\fR), an unique identifier for the paramenters (for example: the feed URL) and an unique identifier for the message (for example: an article ID)\. The identifier for the parameters may be omitted if you provide a \fIhost\fR key and that the host is sufficient to identify the parameters\. Mandatory for threaded discussions handling and duplicates detection, optional else\.
.
.IP "\(bu" 4
\fIhost\fR: the domain name of the producer of the message (in general, the hostname of the server form where you fetched the information)\. Used in \fBMessage\-Id\fR and \fBReferences\fR headers construction, as well as in duplicates detection\. Optional, but strongly encouraged for threaded discussions handling and duplicates detection\.
.
.IP "\(bu" 4
\fIreferences\fR: for threaded discussions, \fIid\fR of the parent messages\. Note that \fIhost\fR must match in the two messages\.
.
.IP "\(bu" 4
\fIurl\fR: URL of the message\. Used by \fBua\-inline\fR to resolve relative references\.
.
.IP "" 0
.
.P
All strings must be encoded in UTF\-8\.

137
doc/maildir-put.html Normal file
View File

@ -0,0 +1,137 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv='content-type' value='text/html;charset=utf8'>
<meta name='generator' value='Ronn/v0.7.3 (http://github.com/rtomayko/ronn/tree/0.7.3)'>
<title>maildir-put</title>
<style type='text/css' media='all'>
/* style: man */
body#manpage {margin:0}
.mp {max-width:100ex;padding:0 9ex 1ex 4ex}
.mp p,.mp pre,.mp ul,.mp ol,.mp dl {margin:0 0 20px 0}
.mp h2 {margin:10px 0 0 0}
.mp > p,.mp > pre,.mp > ul,.mp > ol,.mp > dl {margin-left:8ex}
.mp h3 {margin:0 0 0 4ex}
.mp dt {margin:0;clear:left}
.mp dt.flush {float:left;width:8ex}
.mp dd {margin:0 0 0 9ex}
.mp h1,.mp h2,.mp h3,.mp h4 {clear:left}
.mp pre {margin-bottom:20px}
.mp pre+h2,.mp pre+h3 {margin-top:22px}
.mp h2+pre,.mp h3+pre {margin-top:5px}
.mp img {display:block;margin:auto}
.mp h1.man-title {display:none}
.mp,.mp code,.mp pre,.mp tt,.mp kbd,.mp samp,.mp h3,.mp h4 {font-family:monospace;font-size:14px;line-height:1.42857142857143}
.mp h2 {font-size:16px;line-height:1.25}
.mp h1 {font-size:20px;line-height:2}
.mp {text-align:justify;background:#fff}
.mp,.mp code,.mp pre,.mp pre code,.mp tt,.mp kbd,.mp samp {color:#131211}
.mp h1,.mp h2,.mp h3,.mp h4 {color:#030201}
.mp u {text-decoration:underline}
.mp code,.mp strong,.mp b {font-weight:bold;color:#131211}
.mp em,.mp var {font-style:italic;color:#232221;text-decoration:none}
.mp a,.mp a:link,.mp a:hover,.mp a code,.mp a pre,.mp a tt,.mp a kbd,.mp a samp {color:#0000ff}
.mp b.man-ref {font-weight:normal;color:#434241}
.mp pre {padding:0 4ex}
.mp pre code {font-weight:normal;color:#434241}
.mp h2+pre,h3+pre {padding-left:0}
ol.man-decor,ol.man-decor li {margin:3px 0 10px 0;padding:0;float:left;width:33%;list-style-type:none;text-transform:uppercase;color:#999;letter-spacing:1px}
ol.man-decor {width:100%}
ol.man-decor li.tl {text-align:left}
ol.man-decor li.tc {text-align:center;letter-spacing:4px}
ol.man-decor li.tr {text-align:right;float:right}
</style>
</head>
<!--
The following styles are deprecated and will be removed at some point:
div#man, div#man ol.man, div#man ol.head, div#man ol.man.
The .man-page, .man-decor, .man-head, .man-foot, .man-title, and
.man-navigation should be used instead.
-->
<body id='manpage'>
<div class='mp' id='man'>
<div class='man-navigation' style='display:none'>
<a href="#Usage">Usage</a>
<a href="#Installation">Installation</a>
<a href="#Input-format">Input format</a>
</div>
<ol class='man-decor man-head man head'>
<li class='tl'>maildir-put</li>
<li class='tc'></li>
<li class='tr'>maildir-put</li>
</ol>
<h1>maildir-put</h1>
<p><code>maildir-put</code> is a tool to put messages in a predefined JSON format
inside a maildir. It also try to detect duplicates and drop them.</p>
<h2 id="Usage">Usage</h2>
<pre><code>message-producer | maildir-put [arguments]
</code></pre>
<p>Available arguments:</p>
<ul>
<li><em>-cache</em>: path to a cache file used to store message IDs for duplicate
detection</li>
<li><em>-root</em>: path to the root maildir directory. Defaults to ~/Maildir.</li>
<li><em>-folder</em>: maildir folder to put messages. Defaults to "", the inbox.
The folder separator is "/".</li>
</ul>
<h2 id="Installation">Installation</h2>
<pre><code>go build &amp;&amp; cp maildir-put /usr/local/bin
</code></pre>
<h2 id="Input-format">Input format</h2>
<p>As its input, <code>maildir-put</code> takes a stream of JSON dictionaries (not a
list of dictionaries). Each dictionary represents a message. Available
keys are:</p>
<ul>
<li><em>body</em>: the body of the message, in HTML. Mandatory.</li>
<li><em>title</em>: the subject of the message, in text. Mandatory.</li>
<li><em>date</em>: the date of the message. Optional, defaults to current time. If
provided, must be RFC 2822 compliant.</li>
<li><em>author</em>: the name of the author, in text. Optional.</li>
<li><em>authorEmail</em>: the mail addresse of the author. Optional.</li>
<li><em>id</em>: an unique identifier for the message. It will be used for the
creation of the Message-Id header, as well as in duplicates detection. It
should include three parts: an unique identifier for the application
(for example: <code>rss2json</code>), an unique identifier for the paramenters
(for example: the feed URL) and an unique identifier for the message
(for example: an article ID). The identifier for the parameters may be
omitted if you provide a <em>host</em> key and that the host is sufficient to
identify the parameters. Mandatory for threaded discussions handling and
duplicates detection, optional else.</li>
<li><em>host</em>: the domain name of the producer of the message (in general,
the hostname of the server form where you fetched the information). Used
in <code>Message-Id</code> and <code>References</code> headers construction, as well as in
duplicates detection. Optional, but strongly encouraged for threaded
discussions handling and duplicates detection.</li>
<li><em>references</em>: for threaded discussions, <em>id</em> of the parent messages. Note
that <em>host</em> must match in the two messages.</li>
<li><em>url</em>: URL of the message. Used by <code>ua-inline</code> to resolve relative
references.</li>
</ul>
<p>All strings must be encoded in UTF-8.</p>
<ol class='man-decor man-foot man foot'>
<li class='tl'></li>
<li class='tc'>March 2014</li>
<li class='tr'>maildir-put</li>
</ol>
</div>
</body>
</html>

1
doc/maildir-put.md Symbolic link
View File

@ -0,0 +1 @@
../maildir-put/README.md

32
doc/rss2json Normal file
View File

@ -0,0 +1,32 @@
.\" generated with Ronn/v0.7.3
.\" http://github.com/rtomayko/ronn/tree/0.7.3
.
.TH "RSS2JSON" "" "March 2014" "" ""
\fBrss2json\fR is a simple tool intended to be used with \fBmaildir\-put\fR and \fBggs\fR\. It is used to convert any RSS or Atom feed into a stream of messages usable by \fBmaildir\-put\fR\.
.
.SH "Usage"
.
.nf
rss2json feed\-url
.
.fi
.
.SH "Dependencies"
.
.IP "\(bu" 4
libxml
.
.IP "\(bu" 4
Optional: python and feedparser for parsing of ill\-formed feeds
.
.IP "" 0
.
.SH "Installation"
.
.nf
go build && cp rss2json /usr/local/bin
.
.fi

97
doc/rss2json.html Normal file
View File

@ -0,0 +1,97 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv='content-type' value='text/html;charset=utf8'>
<meta name='generator' value='Ronn/v0.7.3 (http://github.com/rtomayko/ronn/tree/0.7.3)'>
<title>rss2json</title>
<style type='text/css' media='all'>
/* style: man */
body#manpage {margin:0}
.mp {max-width:100ex;padding:0 9ex 1ex 4ex}
.mp p,.mp pre,.mp ul,.mp ol,.mp dl {margin:0 0 20px 0}
.mp h2 {margin:10px 0 0 0}
.mp > p,.mp > pre,.mp > ul,.mp > ol,.mp > dl {margin-left:8ex}
.mp h3 {margin:0 0 0 4ex}
.mp dt {margin:0;clear:left}
.mp dt.flush {float:left;width:8ex}
.mp dd {margin:0 0 0 9ex}
.mp h1,.mp h2,.mp h3,.mp h4 {clear:left}
.mp pre {margin-bottom:20px}
.mp pre+h2,.mp pre+h3 {margin-top:22px}
.mp h2+pre,.mp h3+pre {margin-top:5px}
.mp img {display:block;margin:auto}
.mp h1.man-title {display:none}
.mp,.mp code,.mp pre,.mp tt,.mp kbd,.mp samp,.mp h3,.mp h4 {font-family:monospace;font-size:14px;line-height:1.42857142857143}
.mp h2 {font-size:16px;line-height:1.25}
.mp h1 {font-size:20px;line-height:2}
.mp {text-align:justify;background:#fff}
.mp,.mp code,.mp pre,.mp pre code,.mp tt,.mp kbd,.mp samp {color:#131211}
.mp h1,.mp h2,.mp h3,.mp h4 {color:#030201}
.mp u {text-decoration:underline}
.mp code,.mp strong,.mp b {font-weight:bold;color:#131211}
.mp em,.mp var {font-style:italic;color:#232221;text-decoration:none}
.mp a,.mp a:link,.mp a:hover,.mp a code,.mp a pre,.mp a tt,.mp a kbd,.mp a samp {color:#0000ff}
.mp b.man-ref {font-weight:normal;color:#434241}
.mp pre {padding:0 4ex}
.mp pre code {font-weight:normal;color:#434241}
.mp h2+pre,h3+pre {padding-left:0}
ol.man-decor,ol.man-decor li {margin:3px 0 10px 0;padding:0;float:left;width:33%;list-style-type:none;text-transform:uppercase;color:#999;letter-spacing:1px}
ol.man-decor {width:100%}
ol.man-decor li.tl {text-align:left}
ol.man-decor li.tc {text-align:center;letter-spacing:4px}
ol.man-decor li.tr {text-align:right;float:right}
</style>
</head>
<!--
The following styles are deprecated and will be removed at some point:
div#man, div#man ol.man, div#man ol.head, div#man ol.man.
The .man-page, .man-decor, .man-head, .man-foot, .man-title, and
.man-navigation should be used instead.
-->
<body id='manpage'>
<div class='mp' id='man'>
<div class='man-navigation' style='display:none'>
<a href="#Usage">Usage</a>
<a href="#Dependencies">Dependencies</a>
<a href="#Installation">Installation</a>
</div>
<ol class='man-decor man-head man head'>
<li class='tl'>rss2json</li>
<li class='tc'></li>
<li class='tr'>rss2json</li>
</ol>
<h1>rss2json</h1>
<p><code>rss2json</code> is a simple tool intended to be used with <code>maildir-put</code> and <code>ggs</code>. It is used to convert any RSS or Atom feed into a stream of messages usable by <code>maildir-put</code>.</p>
<h2 id="Usage">Usage</h2>
<pre><code>rss2json feed-url
</code></pre>
<h2 id="Dependencies">Dependencies</h2>
<ul>
<li>libxml</li>
<li>Optional: python and feedparser for parsing of ill-formed feeds</li>
</ul>
<h2 id="Installation">Installation</h2>
<pre><code>go build &amp;&amp; cp rss2json /usr/local/bin
</code></pre>
<ol class='man-decor man-foot man foot'>
<li class='tl'></li>
<li class='tc'>March 2014</li>
<li class='tr'>rss2json</li>
</ol>
</div>
</body>
</html>

1
doc/rss2json.md Symbolic link
View File

@ -0,0 +1 @@
../rss2json/README.md

87
doc/ua Normal file
View File

@ -0,0 +1,87 @@
.\" generated with Ronn/v0.7.3
.\" http://github.com/rtomayko/ronn/tree/0.7.3
.
.TH "UA" "" "March 2014" "" ""
This is a set of tools to aggregate all your information into your maildir\. Each tool can be used separately ; you can find a more complete description in their respective folder\.
.
.IP "\(bu" 4
\fBggs\fR is a software which runs commands periodically
.
.IP "\(bu" 4
\fBmaildir\-put\fR reads a set of messages from its standard input and puts them in a maildir
.
.IP "\(bu" 4
\fBrss2json\fR transforms any RSS/Atom feed into a set of messages that \fBmaildir\-put\fR can process
.
.IP "\(bu" 4
You can write your own producers for maildir\-put ; an example for the mangareader \fIhttp://mangareader\.net\fR service is provided\.
.
.IP "\(bu" 4
You can also put filters, like \fBua\-inline\fR
.
.IP "" 0
.
.SH "Usage"
.
.nf
ggs [path\-to\-configuration\-file]
.
.fi
.
.SH "Dependencies"
.
.IP "\(bu" 4
Go
.
.IP "\(bu" 4
libxml
.
.IP "\(bu" 4
For additional scrappers: python 3, aiohttp and pyquery
.
.IP "" 0
.
.SH "Installation"
.
.nf
make && sudo make install
.
.fi
.
.SH "Configuration"
See the \fBggs\fR documentation for more information\. Here is an sample configuration file, which puts some feeds into \fBFun\fR and \fBGeek\fR folders, some new chapters notification from mangareader into \fBEntertainment\fR, and my Github personal feed into inbox:
.
.IP "" 4
.
.nf
default_timeout=30
rss() {
command 2000 "rss2json \e"$1\e" | ua\-inline | maildir\-put \-root $HOME/Maildir\-feeds \-folder \e"$2\e""
}
mangareader() {
command 2000 "mangareader2json http://mangareader\.net/$1 | "\e
"maildir\-put \-root $HOME/Maildir\-feeds \-folder Entertainment"
}
rss http://xkcd\.com/atom\.xml Fun
rss http://feeds\.feedburner\.com/smbc\-comics/PvLb Fun
rss http://syndication\.thedailywtf\.com/TheDailyWtf Fun
rss http://www\.reddit\.com/r/science/top/\.rss Geek
rss http://www\.phoronix\.com/rss\.php Geek
mangareader naruto
mangareader bleach
mangareader gantz
rss https://github\.com/sloonz\.private\.atom?token=HIDDEN ""
.
.fi
.
.IP "" 0

30
doc/ua-inline Normal file
View File

@ -0,0 +1,30 @@
.\" generated with Ronn/v0.7.3
.\" http://github.com/rtomayko/ronn/tree/0.7.3
.
.TH "UA\-INLINE" "" "March 2014" "" ""
.
.SH "NAME"
\fBua\-inline\fR \- Inline HTML resources
.
.P
This is a simple filter intended to be used before \fBmaildir\-put\fR\. It replaces external images inside the body of the message by their content (using \fBdata:\fR scheme)\.
.
.P
If the body contains relative references, it tries to resolve them using the \fBurl\fR key of the message\. If thats not possible, no inlining is done\.
.
.SH "Example usage, in <code>ggsrc</code>"
.
.nf
command 2000 "rss2json feed\-url | ua\-inline | maildir\-put"
.
.fi
.
.SH "Installation"
.
.nf
go build && cp ua\-inline /usr/local/bin
.
.fi

99
doc/ua-inline.html Normal file
View File

@ -0,0 +1,99 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv='content-type' value='text/html;charset=utf8'>
<meta name='generator' value='Ronn/v0.7.3 (http://github.com/rtomayko/ronn/tree/0.7.3)'>
<title>Inline HTML resources</title>
<style type='text/css' media='all'>
/* style: man */
body#manpage {margin:0}
.mp {max-width:100ex;padding:0 9ex 1ex 4ex}
.mp p,.mp pre,.mp ul,.mp ol,.mp dl {margin:0 0 20px 0}
.mp h2 {margin:10px 0 0 0}
.mp > p,.mp > pre,.mp > ul,.mp > ol,.mp > dl {margin-left:8ex}
.mp h3 {margin:0 0 0 4ex}
.mp dt {margin:0;clear:left}
.mp dt.flush {float:left;width:8ex}
.mp dd {margin:0 0 0 9ex}
.mp h1,.mp h2,.mp h3,.mp h4 {clear:left}
.mp pre {margin-bottom:20px}
.mp pre+h2,.mp pre+h3 {margin-top:22px}
.mp h2+pre,.mp h3+pre {margin-top:5px}
.mp img {display:block;margin:auto}
.mp h1.man-title {display:none}
.mp,.mp code,.mp pre,.mp tt,.mp kbd,.mp samp,.mp h3,.mp h4 {font-family:monospace;font-size:14px;line-height:1.42857142857143}
.mp h2 {font-size:16px;line-height:1.25}
.mp h1 {font-size:20px;line-height:2}
.mp {text-align:justify;background:#fff}
.mp,.mp code,.mp pre,.mp pre code,.mp tt,.mp kbd,.mp samp {color:#131211}
.mp h1,.mp h2,.mp h3,.mp h4 {color:#030201}
.mp u {text-decoration:underline}
.mp code,.mp strong,.mp b {font-weight:bold;color:#131211}
.mp em,.mp var {font-style:italic;color:#232221;text-decoration:none}
.mp a,.mp a:link,.mp a:hover,.mp a code,.mp a pre,.mp a tt,.mp a kbd,.mp a samp {color:#0000ff}
.mp b.man-ref {font-weight:normal;color:#434241}
.mp pre {padding:0 4ex}
.mp pre code {font-weight:normal;color:#434241}
.mp h2+pre,h3+pre {padding-left:0}
ol.man-decor,ol.man-decor li {margin:3px 0 10px 0;padding:0;float:left;width:33%;list-style-type:none;text-transform:uppercase;color:#999;letter-spacing:1px}
ol.man-decor {width:100%}
ol.man-decor li.tl {text-align:left}
ol.man-decor li.tc {text-align:center;letter-spacing:4px}
ol.man-decor li.tr {text-align:right;float:right}
</style>
</head>
<!--
The following styles are deprecated and will be removed at some point:
div#man, div#man ol.man, div#man ol.head, div#man ol.man.
The .man-page, .man-decor, .man-head, .man-foot, .man-title, and
.man-navigation should be used instead.
-->
<body id='manpage'>
<div class='mp' id='man'>
<div class='man-navigation' style='display:none'>
<a href="#NAME">NAME</a>
<a href="#Example-usage-in-ggsrc">Example usage, in ggsrc</a>
<a href="#Installation">Installation</a>
</div>
<ol class='man-decor man-head man head'>
<li class='tl'>ua-inline</li>
<li class='tc'></li>
<li class='tr'>ua-inline</li>
</ol>
<h2 id="NAME">NAME</h2>
<p class="man-name">
<code>ua-inline</code> - <span class="man-whatis">Inline HTML resources</span>
</p>
<p>This is a simple filter intended to be used before <code>maildir-put</code>. It
replaces external images inside the body of the message by their content
(using <code>data:</code> scheme).</p>
<p>If the body contains relative references, it tries to resolve them using
the <code>url</code> key of the message. If thats not possible, no inlining
is done.</p>
<h2 id="Example-usage-in-ggsrc">Example usage, in <code>ggsrc</code></h2>
<pre><code>command 2000 "rss2json feed-url | ua-inline | maildir-put"
</code></pre>
<h2 id="Installation">Installation</h2>
<pre><code>go build &amp;&amp; cp ua-inline /usr/local/bin
</code></pre>
<ol class='man-decor man-foot man foot'>
<li class='tl'></li>
<li class='tc'>March 2014</li>
<li class='tr'>ua-inline</li>
</ol>
</div>
</body>
</html>

1
doc/ua-inline.md Symbolic link
View File

@ -0,0 +1 @@
../ua-inline/README.md

145
doc/ua.html Normal file
View File

@ -0,0 +1,145 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv='content-type' value='text/html;charset=utf8'>
<meta name='generator' value='Ronn/v0.7.3 (http://github.com/rtomayko/ronn/tree/0.7.3)'>
<title>The Universal Aggregator</title>
<style type='text/css' media='all'>
/* style: man */
body#manpage {margin:0}
.mp {max-width:100ex;padding:0 9ex 1ex 4ex}
.mp p,.mp pre,.mp ul,.mp ol,.mp dl {margin:0 0 20px 0}
.mp h2 {margin:10px 0 0 0}
.mp > p,.mp > pre,.mp > ul,.mp > ol,.mp > dl {margin-left:8ex}
.mp h3 {margin:0 0 0 4ex}
.mp dt {margin:0;clear:left}
.mp dt.flush {float:left;width:8ex}
.mp dd {margin:0 0 0 9ex}
.mp h1,.mp h2,.mp h3,.mp h4 {clear:left}
.mp pre {margin-bottom:20px}
.mp pre+h2,.mp pre+h3 {margin-top:22px}
.mp h2+pre,.mp h3+pre {margin-top:5px}
.mp img {display:block;margin:auto}
.mp h1.man-title {display:none}
.mp,.mp code,.mp pre,.mp tt,.mp kbd,.mp samp,.mp h3,.mp h4 {font-family:monospace;font-size:14px;line-height:1.42857142857143}
.mp h2 {font-size:16px;line-height:1.25}
.mp h1 {font-size:20px;line-height:2}
.mp {text-align:justify;background:#fff}
.mp,.mp code,.mp pre,.mp pre code,.mp tt,.mp kbd,.mp samp {color:#131211}
.mp h1,.mp h2,.mp h3,.mp h4 {color:#030201}
.mp u {text-decoration:underline}
.mp code,.mp strong,.mp b {font-weight:bold;color:#131211}
.mp em,.mp var {font-style:italic;color:#232221;text-decoration:none}
.mp a,.mp a:link,.mp a:hover,.mp a code,.mp a pre,.mp a tt,.mp a kbd,.mp a samp {color:#0000ff}
.mp b.man-ref {font-weight:normal;color:#434241}
.mp pre {padding:0 4ex}
.mp pre code {font-weight:normal;color:#434241}
.mp h2+pre,h3+pre {padding-left:0}
ol.man-decor,ol.man-decor li {margin:3px 0 10px 0;padding:0;float:left;width:33%;list-style-type:none;text-transform:uppercase;color:#999;letter-spacing:1px}
ol.man-decor {width:100%}
ol.man-decor li.tl {text-align:left}
ol.man-decor li.tc {text-align:center;letter-spacing:4px}
ol.man-decor li.tr {text-align:right;float:right}
</style>
</head>
<!--
The following styles are deprecated and will be removed at some point:
div#man, div#man ol.man, div#man ol.head, div#man ol.man.
The .man-page, .man-decor, .man-head, .man-foot, .man-title, and
.man-navigation should be used instead.
-->
<body id='manpage'>
<div class='mp' id='man'>
<div class='man-navigation' style='display:none'>
<a href="#Usage">Usage</a>
<a href="#Dependencies">Dependencies</a>
<a href="#Installation">Installation</a>
<a href="#Configuration">Configuration</a>
</div>
<ol class='man-decor man-head man head'>
<li class='tl'>ua</li>
<li class='tc'></li>
<li class='tr'>ua</li>
</ol>
<h1>The Universal Aggregator</h1>
<p>This is a set of tools to aggregate all your information into your
maildir. Each tool can be used separately ; you can find a more complete
description in their respective folder.</p>
<ul>
<li><code>ggs</code> is a software which runs commands periodically</li>
<li><code>maildir-put</code> reads a set of messages from its standard input and puts
them in a maildir</li>
<li><code>rss2json</code> transforms any RSS/Atom feed into a set of messages that
<code>maildir-put</code> can process</li>
<li>You can write your own producers for maildir-put ; an example for the
<a href="http://mangareader.net">mangareader</a> service is provided.</li>
<li>You can also put filters, like <code>ua-inline</code></li>
</ul>
<h2 id="Usage">Usage</h2>
<pre><code>ggs [path-to-configuration-file]
</code></pre>
<h2 id="Dependencies">Dependencies</h2>
<ul>
<li>Go</li>
<li>libxml</li>
<li>For additional scrappers: python 3, aiohttp and pyquery</li>
</ul>
<h2 id="Installation">Installation</h2>
<pre><code>make &amp;&amp; sudo make install
</code></pre>
<h2 id="Configuration">Configuration</h2>
<p>See the <code>ggs</code> documentation for more information. Here is an sample
configuration file, which puts some feeds into <code>Fun</code> and <code>Geek</code> folders,
some new chapters notification from mangareader into <code>Entertainment</code>,
and my Github personal feed into inbox:</p>
<pre><code>default_timeout=30
rss() {
command 2000 "rss2json \"$1\" | ua-inline | maildir-put -root $HOME/Maildir-feeds -folder \"$2\""
}
mangareader() {
command 2000 "mangareader2json http://mangareader.net/$1 | "\
"maildir-put -root $HOME/Maildir-feeds -folder Entertainment"
}
rss http://xkcd.com/atom.xml Fun
rss http://feeds.feedburner.com/smbc-comics/PvLb Fun
rss http://syndication.thedailywtf.com/TheDailyWtf Fun
rss http://www.reddit.com/r/science/top/.rss Geek
rss http://www.phoronix.com/rss.php Geek
mangareader naruto
mangareader bleach
mangareader gantz
rss https://github.com/sloonz.private.atom?token=HIDDEN ""
</code></pre>
<ol class='man-decor man-foot man foot'>
<li class='tl'></li>
<li class='tc'>March 2014</li>
<li class='tr'>ua</li>
</ol>
</div>
</body>
</html>

1
doc/ua.md Symbolic link
View File

@ -0,0 +1 @@
../README.md

50
ggs/README.md Normal file
View File

@ -0,0 +1,50 @@
# GGS
`GGS` (Grey Goo Spawner) is a simple software that runs jobs
periodically. It is similar with cron, but with some differences :
* Whereas `cron` launches jobs at specific times, `ggs` is mainly
interested in intervals. It will run all jobs at its startup and then
will re-run each job after a certain delay has passed.
* `ggs` has a system of `workers`, similar to many servers (like nginx
or Apache with MPM Workers) to limit ressource concurrency between your
jobs .
* You can define a timeout for your jobs, too.
## Usage
`ggs [configuration file]`
If no configuration file is provided, `ggs` will use `~/.config/ggsrc`
by default.
## Installation
`go build ggs.go && cp ggs /usr/local/bin`
## Configuration
Configuration file is a shell script, so same rule as `sh` applies.
You create a job with the `command` function, which takes two arguments:
the delay between launches, and the command to run. You can specify a
timeout (in seconds) by setting the `timeout` environnement variable
(optional, default: 0 no timeout).
timeout=30 command 300 "uptime | mail admin@example.com"
command 5 'ping -c 1 github.com || sudo halt -p'
You can also set the number of workers (maximum number of jobs that can
run simultaneously):
workers=5 #Warning: dont do "workers = 5", spaces matters here !
## Advanced configuration
The configuration file is just a shell script which produces a JSON
document which maches the structure of the `Config` structure. You can do
`exec my_script` to produce the same JSON with a script in your favorite
language. You can also use variables, functions, execute external
commands, and so on...

140
ggs/ggs.go Normal file
View File

@ -0,0 +1,140 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
"os/signal"
"syscall"
"time"
)
type Command struct {
Delay int
Timeout int
Command string
}
type Config struct {
Workers int
Commands []*Command
}
var config Config
var ch chan *Command
const CONFIG_WRAPPER = `
workers=5
default_timeout=0
commands=
jsonString() {
perl -pe 's/\\/\\\\/g;s/"/\\"/g'
}
command() {
if [ "$commands" != "" ] ; then
commands="$commands,"
fi
delay=$1;shift
timeout=${timeout:-$default_timeout}
commands=$commands'{"Delay":'$delay',"Command":"'$(echo $@|jsonString)'","Timeout":'$timeout'}'
timeout=
}
source %s
echo '{"Workers":'$workers',"Commands":['$commands']}'
`
func readConfig() error {
var cfgFile string
if len(os.Args) > 1 {
cfgFile = os.Args[1]
} else {
cfgFile = os.ExpandEnv("$HOME/.config/ggsrc")
}
sp := exec.Command("sh")
sp.Stderr = os.Stderr
sp.Stdin = bytes.NewBuffer([]byte(fmt.Sprintf(CONFIG_WRAPPER, cfgFile)))
out, err := sp.Output()
if err != nil {
return err
}
err = json.Unmarshal(out, &config)
if err != nil {
return err
}
return nil
}
func process(cmd *Command) {
var timer *time.Timer
var err error
log.Print(cmd.Command)
sp := exec.Command("sh", "-c", cmd.Command)
sp.Stdout = os.Stdout
sp.Stderr = os.Stderr
if err = sp.Start(); err != nil {
log.Printf("%s failed: %s", err.Error(), cmd.Command)
goto scheduleNextLaunch
}
if cmd.Timeout > 0 {
timer = time.AfterFunc(time.Duration(cmd.Timeout)*time.Second, func() {
timer = nil
if sp.ProcessState == nil {
sp.Process.Kill()
}
})
}
err = sp.Wait()
if timer != nil {
timer.Stop()
}
if err != nil {
log.Printf("%s failed: %s", err.Error(), cmd.Command)
}
scheduleNextLaunch:
time.AfterFunc(time.Duration(cmd.Delay)*time.Second, func() {
ch <- cmd
})
}
func worker() {
for {
process(<-ch)
}
}
func main() {
err := readConfig()
if err != nil {
fmt.Fprintf(os.Stderr, "Error while reading configuration: %s", err)
os.Exit(1)
}
ch = make(chan *Command, len(config.Commands))
for i := 0; i < config.Workers; i++ {
go worker()
}
for _, cmd := range config.Commands {
ch <- cmd
}
// wait for SIGINT
sigChan := make(chan os.Signal)
signal.Notify(sigChan, syscall.SIGINT)
<-sigChan
}

23
ggsrc.example Normal file
View File

@ -0,0 +1,23 @@
default_timeout=30
rss() {
command 2000 "rss2json \"$1\" | maildir-put -root $HOME/Maildir-feeds -folder \"$2\""
}
mangareader() {
command 2000 "mangareader2json http://mangareader.net/$1 | "\
"maildir-put -root $HOME/Maildir-feeds -folder Entertainment"
}
rss http://xkcd.com/atom.xml Fun
rss http://feeds.feedburner.com/smbc-comics/PvLb Fun
rss http://syndication.thedailywtf.com/TheDailyWtf Fun
rss http://www.reddit.com/r/science/top/.rss Geek
rss http://www.phoronix.com/rss.php Geek
mangareader naruto
mangareader bleach
mangareader gantz
rss https://github.com/sloonz.private.atom?token=HIDDEN ""

53
maildir-put/README.md Normal file
View File

@ -0,0 +1,53 @@
# maildir-put
`maildir-put` is a tool to put messages in a predefined JSON format
inside a maildir. It also try to detect duplicates and drop them.
## Usage
message-producer | maildir-put [arguments]
Available arguments:
* *-cache*: path to a cache file used to store message IDs for duplicate
detection
* *-root*: path to the root maildir directory. Defaults to ~/Maildir.
* *-folder*: maildir folder to put messages. Defaults to "", the inbox.
The folder separator is "/".
## Installation
go build && cp maildir-put /usr/local/bin
## Input format
As its input, `maildir-put` takes a stream of JSON dictionaries (not a
list of dictionaries). Each dictionary represents a message. Available
keys are:
* *body*: the body of the message, in HTML. Mandatory.
* *title*: the subject of the message, in text. Mandatory.
* *date*: the date of the message. Optional, defaults to current time. If
provided, must be RFC 2822 compliant.
* *author*: the name of the author, in text. Optional.
* *authorEmail*: the mail addresse of the author. Optional.
* *id*: an unique identifier for the message. It will be used for the
creation of the Message-Id header, as well as in duplicates detection. It
should include three parts: an unique identifier for the application
(for example: `rss2json`), an unique identifier for the paramenters
(for example: the feed URL) and an unique identifier for the message
(for example: an article ID). The identifier for the parameters may be
omitted if you provide a *host* key and that the host is sufficient to
identify the parameters. Mandatory for threaded discussions handling and
duplicates detection, optional else.
* *host*: the domain name of the producer of the message (in general,
the hostname of the server form where you fetched the information). Used
in `Message-Id` and `References` headers construction, as well as in
duplicates detection. Optional, but strongly encouraged for threaded
discussions handling and duplicates detection.
* *references*: for threaded discussions, *id* of the parent messages. Note
that *host* must match in the two messages.
* *url*: URL of the message. Used by `ua-inline` to resolve relative
references.
All strings must be encoded in UTF-8.

83
maildir-put/cache.go Normal file
View File

@ -0,0 +1,83 @@
package main
import (
"io"
"syscall"
"os"
"bufio"
)
type Cache struct {
data map[string]bool
newData map[string]bool
path string
}
func OpenCache(path string) (c *Cache, err error) {
var key string
c = &Cache{make(map[string]bool), make(map[string]bool), path}
cacheFile, err := os.Open(c.path)
if err != nil {
if os.IsNotExist(err) {
err = nil
} else {
c = nil
}
return
}
reader := bufio.NewReader(cacheFile)
for err != io.EOF {
if key, err = reader.ReadString('\n'); err != nil && err != io.EOF {
c = nil
return
}
if key != "" {
c.data[key[:len(key)-1]] = true
}
}
err = nil
return
}
func (c *Cache) Set(key string) {
c.newData[key] = true
}
func (c *Cache) Get(key string) bool {
_, has := c.data[key]
if !has {
_, has = c.newData[key]
}
return has
}
func (c *Cache) Dump() error {
cacheFile, err := os.OpenFile(c.path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0660)
if err != nil {
return err
}
defer cacheFile.Close()
if err = syscall.Flock(int(cacheFile.Fd()), 2); err != nil {
return err
}
writer := bufio.NewWriter(cacheFile)
for key, _ := range c.newData {
if _, err = writer.WriteString(key); err != nil {
return err
}
if _, err = writer.WriteString("\n"); err != nil {
return err
}
}
if err = writer.Flush(); err != nil {
return err
}
return nil
}

147
maildir-put/maildir-put.go Normal file
View File

@ -0,0 +1,147 @@
package main
import (
"bytes"
"crypto/sha256"
"encoding/json"
"errors"
"flag"
"fmt"
"github.com/sloonz/go-maildir"
"github.com/sloonz/go-mime-message"
"github.com/sloonz/go-qprintable"
"io"
"log"
"os"
"strings"
"time"
)
var hostname string
var cache *Cache
type Message struct {
Id string `json:"id"`
Body string `json:"body"`
Title string `json:"title"`
Author string `json:"author"`
AuthorEmail string `json:"authorEmail"`
Date string `json:"date"`
References []string `json:"references"`
Host string `json:"host"`
}
func MessageId(id, host string) string {
idH := sha256.New()
idH.Write([]byte(id))
return fmt.Sprintf("<%x.maildir-put@%s>", idH.Sum(nil), host)
}
func (m *Message) Process(md *maildir.Maildir) error {
var id string
if m.Body == "" || m.Title == "" {
return errors.New("Missing mandatory field")
}
if m.Host == "" {
m.Host = hostname
}
if m.AuthorEmail == "" {
m.AuthorEmail = "noreply@" + m.Host
}
if m.Date == "" {
m.Date = time.Now().UTC().Format(time.RFC1123Z)
}
if m.Id != "" {
id = MessageId(m.Id, m.Host)
if cache.Get(id) {
return nil
} else {
cache.Set(id)
}
}
mail := message.NewTextMessage(qprintable.UnixTextEncoding, bytes.NewBufferString(m.Body))
mail.SetHeader("Date", m.Date)
mail.SetHeader("Subject", message.EncodeWord(m.Title))
mail.SetHeader("From", message.EncodeWord(m.Author)+" <"+m.AuthorEmail+">")
mail.SetHeader("Content-Type", "text/html; charset=\"UTF-8\"")
if id != "" {
mail.SetHeader("Message-Id", id)
}
if len(m.References) > 0 {
refs := ""
for _, r := range m.References {
refs += " " + MessageId(r, m.Host)
}
mail.SetHeader("References", refs)
}
md.CreateMail(mail)
return nil
}
func main() {
var rootDir, folder, cacheFile string
var err error
flag.StringVar(&rootDir, "root", os.ExpandEnv("$HOME/Maildir"), "path to maildir")
flag.StringVar(&folder, "folder", "", "maildir folder name to put email (empty for inbox")
flag.StringVar(&cacheFile, "cache", os.ExpandEnv("$HOME/.cache/maildir-put.cache"),
"path to store message-ids to drop duplicate messages")
if flag.Parse(); !flag.Parsed() {
flag.PrintDefaults()
os.Exit(1)
}
if cache, err = OpenCache(cacheFile); err != nil {
log.Printf("Can't open cache: %s", err.Error())
os.Exit(1)
}
if hostname, err = os.Hostname(); err != nil {
log.Print("Can't get hostname: %s", err.Error())
os.Exit(1)
}
md, err := maildir.New(rootDir, true)
if err != nil {
log.Print("Can't open maildir: %s", err.Error())
os.Exit(1)
}
for _, subfolder := range strings.Split(folder, "/") {
if subfolder != "" {
md, err = md.Child(subfolder, true)
if err != nil {
log.Print("Can't open maildir: %s", err.Error())
os.Exit(1)
}
}
}
dec := json.NewDecoder(os.Stdin)
for {
msg := new(Message)
err = dec.Decode(msg)
if err == nil {
err = msg.Process(md)
}
if err == io.EOF {
break
} else if err != nil {
log.Printf("Cannot read input message: %s", err.Error())
}
}
if err = cache.Dump(); err != nil {
log.Printf("warning: can't dump cache: %s", err.Error())
}
}

16
rss2json/README.md Normal file
View File

@ -0,0 +1,16 @@
# rss2json
`rss2json` is a simple tool intended to be used with `maildir-put` and `ggs`. It is used to convert any RSS or Atom feed into a stream of messages usable by `maildir-put`.
## Usage
rss2json feed-url
## Dependencies
* libxml
* Optional: python and feedparser for parsing of ill-formed feeds
## Installation
go build && cp rss2json /usr/local/bin

94
rss2json/rss2json.go Normal file
View File

@ -0,0 +1,94 @@
package main
import (
"encoding/json"
"fmt"
"github.com/sloonz/cfeedparser"
"net/url"
"os"
"regexp"
"strings"
"time"
)