ua-inline: use attachment for images

rrix
Simon Lipp 2016-03-21 10:25:14 +01:00
parent be51bd77f6
commit 2ee32d8d81
2 changed files with 158 additions and 84 deletions

View File

@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"flag"
"fmt"
"github.com/sloonz/go-maildir"
"github.com/sloonz/go-mime-message"
"github.com/sloonz/go-qprintable"
@ -18,25 +19,37 @@ import (
var hostname string
var cache Cache
type Message struct {
Id string `json:"id"`
Body string `json:"body"`
Title string `json:"title"`
Author string `json:"author"`
AuthorEmail string `json:"authorEmail"`
Date string `json:"date"`
References []string `json:"references"`
Host string `json:"host"`
type Attachment struct {
CID string `json:"cid"`
MimeType string `json:"mimeType"`
Data []byte `json:"data"`
Filename string `json:"filename"`
}
func isDotAtomText(s string) bool {
type Message struct {
Id string `json:"id"`
Body string `json:"body"`
Title string `json:"title"`
Author string `json:"author"`
AuthorEmail string `json:"authorEmail"`
Date string `json:"date"`
References []string `json:"references"`
Host string `json:"host"`
Attachments []Attachment `json:"attachments"`
}
func isAtomText(s string, allowDot bool) bool {
if s == "" {
return false
}
pointAllowed := false
for i := 0; i < len(s); i++ {
c := s[i]
// "." is allowed, but not in first position
// ".." is not allowed
if c == '.' && pointAllowed {
if c == '.' && pointAllowed && allowDot {
pointAllowed = false
continue
} else {
@ -65,55 +78,60 @@ func isDotAtomText(s string) bool {
return true
}
// allowDot=true is for no-fold-quote ; allowDot=fales is for quoted-string
func encNoFoldQuote(s string, buf *bytes.Buffer, allowDot bool) {
if isAtomText(s, allowDot) {
buf.WriteString(s)
} else {
// Encode left part as no-fold-quote
// ASCII 9 (\t), 32 (space), 34 (dquote), 92 (backslash) are escaped with a backslash
// Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped
// Other characters are transmitted as-is
buf.WriteByte('"')
for i := 0; i < len(s); i++ {
if s[i] == 0 || s[i] == '\r' || s[i] == '\n' || s[i] > 127 {
// Drop it
} else if s[i] == '\t' || s[i] == ' ' || s[i] == '"' || s[i] == '\\' {
buf.Write([]byte{'\\', s[i]})
} else {
buf.WriteByte(s[i])
}
}
buf.WriteByte('"')
}
}
func encNoFoldLiteral(s string, buf *bytes.Buffer) {
if isAtomText(s, true) {
buf.WriteString(s)
} else {
// Encode right part as no-fold-literal
// ASCII 9 (\t), 32 (space), 91 ([), 92 (backslash) and 93 (]) are escaped with a backslash
// Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped
// Other characters are transmitted as-is
buf.WriteByte('[')
for i := 0; i < len(s); i++ {
if s[i] == 0 || s[i] == '\r' || s[i] == '\n' || s[i] > 127 {
// Drop it
} else if s[i] == '\t' || s[i] == ' ' || s[i] == '[' || s[i] == '\\' || s[i] == ']' {
buf.Write([]byte{'\\', s[i]})
} else {
buf.WriteByte(s[i])
}
}
buf.WriteByte(']')
}
}
func MessageId(id, host string) string {
// According to RFC 2822:
// msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
// id-left = dot-atom-text / no-fold-quote
// id-right = dot-atom-text / no-fold-literal
idBuf := bytes.NewBufferString("<")
if isDotAtomText(id) {
idBuf.WriteString(id)
} else {
// Encode left part as no-fold-quote
// ASCII 9 (\t), 32 (space), 34 (dquote), 92 (backslash) are escaped with a backslash
// Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped
// Other characters are transmitted as-is
idBuf.WriteByte('"')
for i := 0; i < len(id); i++ {
if id[i] == 0 || id[i] == '\r' || id[i] == '\n' || id[i] > 127 {
// Drop it
} else if id[i] == '\t' || id[i] == ' ' || id[i] == '"' || id[i] == '\\' {
idBuf.Write([]byte{'\\', id[i]})
} else {
idBuf.WriteByte(id[i])
}
}
idBuf.WriteByte('"')
}
encNoFoldQuote(id, idBuf, true)
idBuf.WriteByte('@')
if isDotAtomText(host) {
idBuf.WriteString(host)
} else {
// Encode right part as no-fold-literal
// ASCII 9 (\t), 32 (space), 91 ([), 92 (backslash) and 93 (]) are escaped with a backslash
// Non-ASCII and ASCII 0, 10 (\n), 13 (\r) are dropped
// Other characters are transmitted as-is
idBuf.WriteByte('[')
for i := 0; i < len(host); i++ {
if host[i] == 0 || host[i] == '\r' || host[i] == '\n' || host[i] > 127 {
// Drop it
} else if host[i] == '\t' || host[i] == ' ' || host[i] == '[' || host[i] == '\\' || host[i] == ']' {
idBuf.Write([]byte{'\\', host[i]})
} else {
idBuf.WriteByte(host[i])
}
}
idBuf.WriteByte(']')
}
encNoFoldLiteral(host, idBuf)
idBuf.WriteByte('>')
return idBuf.String()
@ -121,6 +139,7 @@ func MessageId(id, host string) string {
func (m *Message) Process(md *maildir.Maildir) error {
var id string
var mail *message.Message
if m.Body == "" || m.Title == "" {
return errors.New("Missing mandatory field")
@ -145,16 +164,44 @@ func (m *Message) Process(md *maildir.Maildir) error {
}
}
mail := message.NewTextMessage(qprintable.UnixTextEncoding, bytes.NewBufferString(m.Body))
rootContentType := "text/html; charset=\"UTF-8\""
bodyPart := message.NewTextMessage(qprintable.UnixTextEncoding, bytes.NewBufferString(m.Body))
bodyPart.SetHeader("Content-Type", rootContentType)
if m.Attachments == nil {
mail = bodyPart
} else {
ctBuf := bytes.NewBufferString("")
encNoFoldQuote(rootContentType, ctBuf, false)
rootPart := message.NewMultipartMessageParams("related", "",
map[string]string{"type": ctBuf.String()})
rootPart.AddPart(bodyPart)
for _, attachment := range m.Attachments {
attPart := message.NewBinaryMessage(bytes.NewBuffer(attachment.Data))
attPart.SetHeader("Content-ID", fmt.Sprintf("<%s>", attachment.CID))
attPart.SetHeader("Content-Type", attachment.MimeType)
if attachment.Filename == "" {
attPart.SetHeader("Content-Disposition", "inline")
} else {
fnBuf := bytes.NewBufferString("")
encNoFoldQuote(attachment.Filename, fnBuf, false)
attPart.SetHeader("Content-Description", attachment.Filename)
attPart.SetHeader("Content-Disposition", fmt.Sprintf("inline; filename=%s", fnBuf.String()))
}
rootPart.AddPart(attPart)
}
mail = &rootPart.Message
}
// In a maildir, mails are expected to end with LF line endings. Most softwares are
// just fine with CRLF line endings, but some (for example Mutt) dont.
mail.EOL = "\n"
mail.SetHeader("Date", m.Date)
mail.SetHeader("Subject", message.EncodeWord(m.Title))
mail.SetHeader("From", message.EncodeWord(m.Author)+" <"+m.AuthorEmail+">")
mail.SetHeader("Content-Type", "text/html; charset=\"UTF-8\"")
if id != "" {
mail.SetHeader("Message-Id", id)
}

View File

@ -25,11 +25,19 @@ type Message map[string]interface{}
var CacheDir string
func fetch(resUrlString string, baseUrl *url.URL) string {
func hash(name string) string {
h := sha256.New()
h.Write([]byte(name))
return fmt.Sprintf("%x", h.Sum(nil))
}
func fetch(resUrlString string, baseUrl *url.URL) (data []byte, contentType string) {
var err error
// Resolve relative url
resUrl, _ := url.Parse(resUrlString)
if resUrl == nil || (baseUrl == nil && !resUrl.IsAbs()) {
return ""
return nil, ""
}
if !resUrl.IsAbs() {
@ -37,14 +45,19 @@ func fetch(resUrlString string, baseUrl *url.URL) string {
}
// Test cache
h := sha256.New()
h.Write([]byte(resUrl.String()))
cacheFile := fmt.Sprintf("%s/%x@%s", CacheDir, h.Sum(nil), resUrl.Host)
data, err := ioutil.ReadFile(cacheFile)
h := hash(resUrl.String())
dataCacheFile := fmt.Sprintf("%s/data-%x@%s", CacheDir, h, resUrl.Host)
typeCacheFile := fmt.Sprintf("%s/type-%x@%s", CacheDir, h, resUrl.Host)
data, err = ioutil.ReadFile(dataCacheFile)
if err == nil {
return string(data)
var bContentType []byte
bContentType, err = ioutil.ReadFile(typeCacheFile)
contentType = string(bContentType)
}
if err == nil {
return
} else if !os.IsNotExist(err) {
log.Printf("Can't read cache file %s: %s", cacheFile, err.Error())
log.Printf("Can't read cache file %s or %s: %s", dataCacheFile, typeCacheFile, err.Error())
}
// Cache miss
@ -55,34 +68,32 @@ func fetch(resUrlString string, baseUrl *url.URL) string {
} else {
log.Printf("Error downloading %s: %s", resUrl.String(), resp.Status)
}
return ""
return nil, ""
}
data, err = ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
log.Printf("Error downloading %s: %s", resUrl.String(), err.Error())
return ""
return nil, ""
}
// Transform to data: URI scheme
var mimetype string
// Get type
if _, ok := resp.Header["Content-Type"]; ok {
mimetype = resp.Header["Content-Type"][0]
contentType = resp.Header["Content-Type"][0]
} else {
mimetype = http.DetectContentType(data)
contentType = http.DetectContentType(data)
}
if strings.Contains(mimetype, ";") {
mimetype = mimetype[:strings.Index(mimetype, ";")]
}
data = []byte(fmt.Sprintf("data:%s;base64,%s", mimetype, base64.StdEncoding.EncodeToString(data)))
// Write to cache
if err = ioutil.WriteFile(cacheFile, data, os.FileMode(0644)); err != nil {
log.Printf("Can't write cache file %s: %s", cacheFile, err.Error())
if err = ioutil.WriteFile(dataCacheFile, data, os.FileMode(0644)); err != nil {
log.Printf("Can't write cache file %s: %s", dataCacheFile, err.Error())
}
if err = ioutil.WriteFile(typeCacheFile, []byte(contentType), os.FileMode(0644)); err != nil {
log.Printf("Can't write cache file %s: %s", typeCacheFile, err.Error())
}
return string(data)
return
}
func ProcessMessage(msg Message, ch chan Message) {
@ -104,15 +115,26 @@ func ProcessMessage(msg Message, ch chan Message) {
}
}
var attachments []map[string]string
attrRe := "\\s*[\"']?\\s*([^\\s\"'>]+)\\s*[\"']?"
// Inline <img>
// Inline <img> as attachment
body = regexp.MustCompile("<img[^>]+>").ReplaceAllStringFunc(body, func(img string) string {
src := regexp.MustCompile("src="+attrRe).FindStringSubmatch(img)
if len(src) > 1 && !strings.HasPrefix(src[1], "data:") {
data := fetch(html.UnescapeString(src[1]), msgUrl)
if data != "" {
return strings.Replace(img, src[0], "src=\""+data+"\"", 1)
cid := hash(src[1])
filename := regexp.MustCompile("/([^/?]+)(\\?|$)").FindStringSubmatch(src[1])
data, mimeType := fetch(html.UnescapeString(src[1]), msgUrl)
if data != nil {
attachment := map[string]string {
"cid": cid,
"mimeType": mimeType,
"data": base64.StdEncoding.EncodeToString(data)}
if filename != nil {
attachment["filename"] = filename[1]
}
attachments = append(attachments, attachment)
return strings.Replace(img, src[0], fmt.Sprintf("src=\"cid:%s\"", cid), 1)
}
}
return img
@ -122,9 +144,10 @@ func ProcessMessage(msg Message, ch chan Message) {
body = regexp.MustCompile("<style[^>]+>").ReplaceAllStringFunc(body, func(style string) string {
src := regexp.MustCompile("src="+attrRe).FindStringSubmatch(style)
if len(src) > 1 && !strings.HasPrefix(src[1], "data:") {
data := fetch(html.UnescapeString(src[1]), msgUrl)
if data != "" {
return strings.Replace(style, src[0], "src=\""+data+"\"", 1)
data, mimeType := fetch(html.UnescapeString(src[1]), msgUrl)
if data != nil {
newSrc := fmt.Sprintf("src=\"data:%s;base64,%s\"", mimeType, base64.StdEncoding.EncodeToString(data))
return strings.Replace(style, src[0], newSrc, 1)
}
}
return style
@ -132,6 +155,10 @@ func ProcessMessage(msg Message, ch chan Message) {
msg["body"] = body
if attachments != nil {
msg["attachments"] = attachments
}
ch <- msg
}