new maildir rendering (moved from jaromail)

This commit is contained in:
Jaromil 2017-04-30 15:03:30 +02:00
parent 73029ef7c8
commit 92239bf018
2 changed files with 579 additions and 7 deletions

508
maildir Executable file
View File

@ -0,0 +1,508 @@
#!/usr/bin/env zsh
#
# WebNomad, your slick and static website publisher
# (this 'maildir' code is taken from jaromail)
#
# Copyright (C) 2010-2017 Denis Roio <jaromil@dyne.org>
#
# This source code is free software; you can redistribute it and/or
# modify it under the terms of the GNU Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This source code is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# Please refer to the GNU Public License for more details.
#
# You should have received a copy of the GNU Public License along with
# this source code; if not, write to:
# Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# checks if its a maildir
# returns 0 (success) if yes
# no in all other cases
maildircheck() {
fn maildircheck $*
[[ -r "$1" ]] || {
func "Maildir not existing: $1"
return 1 }
[[ -r "$1/cur" ]] && {
return 0 } # Yes is a maildir
# shortened test to speedup
# && { test -r $1/new } \
# && { test -r $1/tmp } \
error "Not a maildir: $1"
return 1
}
# short utility to print only mail headers
hdr() {
[[ -r "$1" ]] || {
error "hdr() called on non existing file: $1"
return 1 }
awk '{ print $0 }
/^$/ { exit }' "$1"
}
# creates the database keeping up-to-date information
# on which emails inside the maildir are already published
maildb_create() {
fn pubdb_create $*
[[ -r "$1" ]] && {
warning "Maildir database already exists: $1"
return 1 }
cat <<EOF | sqlite3 -batch "$1"
CREATE TABLE published
(
uid text collate nocase unique,
file text collate nocase,
path text collate nocase,
mail_subj text,
mail_from text,
mail_date timestamp,
insert_date timestamp
);
EOF
[[ $? != 0 ]] && {
error "Error creating maildir database: $1"
return 1
}
# make sure is private
chmod 600 "$1"
chown $_uid:$_gid "$1"
act "Maildir database created: $1"
return 0
}
maildb_lookup_uid() {
# arg 1: sqlite database file
# arg 2: uid string to search
fn maildb_lookup_uid $*
_db="$1"
_id="$2"
req=(_db _id)
ckreq || return 1
cat <<EOF | sqlite3 -column -batch "$1"
SELECT file FROM published
WHERE uid IS "${(Q)2}";
EOF
}
maildb_insert() {
fn maildb_insert $*
_db="$1"
_path="$2"
_file=`basename "$2"`
req=(_db _path _file)
freq=("$_db" "$_path")
ckreq || return 1
# TODO:
# _time=`${WORKDIR}/bin/fetchdate "$1" "%Y-%m-%d-%H-%M-%S"`
func "new mail in database: $_file"
cat <<EOF | sqlite3 -batch "$_db"
INSERT INTO published (uid, file, path, mail_from, mail_subj,
mail_date, insert_date)
VALUES ("${mail_id}", "${_file}", "${_path}",
"${mail_from}", "${mail_subj}",
"${mail_date}", "${mail_insert_date}");
EOF
[[ $? = 0 ]] || {
error "mail_insert: duplicate found in $_db"
return 1 }
return 0
}
maildb_list() {
fn maildb_list $*
_db="$1"
req=(_db)
freq=($_db)
ckreq || return 1
cat <<EOF | sqlite3 -batch "$_db"
.width 64 128
.separator "|"
SELECT uid, path, mail_from, mail_subj, mail_date, insert_date FROM published
ORDER BY insert_date DESC;
EOF
}
# takes an entry from maildb_list and fills in globals
mail_fill() {
fn mail_fill $*
_e="$1"
req=(_e)
ckreq || return 1
mail_id=${_e[(ws:|:)1]}
mail_file=${_e[(ws:|:)2]}
mail_from=${_e[(ws:|:)3]}
mail_subj=${_e[(ws:|:)4]}
mail_date=${_e[(ws:|:)5]}
mail_insert_date=${_e[(ws:|:)6]}
}
mail_getinfo() {
fn mail_getinfo $*
# TODO: path should be file only, find through all maildir
_path="$1"
req=(_path)
freq=($_path)
ckreq || return 1
# extract header
_hdr=`hdr "$_path"`
# compute rendered filename
mail_render=`print ${mail_id} | sed -e 's/\///g'`.html
func "RENDER: $mail_render"
# gather content headers
mail_from=`print "$_hdr" | cut -d'<' -f1 | awk '
/^From/ { for(i=2;i<=NF;i++) printf "%s ", $i; }
' | sed 's/"//g'`
mail_subj=`print "$_hdr" | awk '
/^Subject/ { for(i=2;i<=NF;i++) printf "%s ", $i; }
' | escape_html`
# compute date found in the email headers
_date=`print "$_hdr" | awk '
/^Date/ { for(i=2;i<=NF;i++) printf "%s ", $i; }'`
mail_date=`date -d"$_date" --rfc-3339=seconds | sed 's/ /T/'`
func "DATE: $mail_date"
mail_insert_date=`date --rfc-3339=seconds | sed 's/ /T/'`
func "INSERTED: $mail_insert_date"
# ATOM spec wants a T where date puts a space
# date --rfc-3339=seconds | sed 's/ /T/'
return 0
}
# this one requires maildir-utils in apt
mail_file_render() {
fn mail_file_render $*
_path="$1"
req=(_path)
freq=($_path)
ckreq || return 1
cat <<EOF
<h2>${mail_subj}</h2>
<h4>From: ${mail_from} - ${mail_date}</h4>
EOF
# check if it has already html
_html=`mu extract "$_path" | awk '/text\/html/ {print $1; exit}'`
[[ "$_html" = "" ]] || {
mu extract --overwrite --parts="$_html" "$_path"
# check if there is an html header to weed out
grep '<body>' "$_html".part > /dev/null
if [ $? = 0 ]; then
awk '
BEGIN { body=0 }
/<body/ { body=1; next }
/<\/body/ { body=0; next }
{ if(body==1) print $0 }' "$_html".part | iconv -c
else
cat "$_html".part | iconv -c
fi
rm -f "$_html".part
return 0 }
# use the first text/plain, parse through markdown just in case
_text=`mu extract "$_path" | awk '/text\/plain/ {print $1; exit}'`
{ test "$_text" = "" } || {
mu extract --overwrite --parts="$_text" "$_path"
cat "$_text".part | iconv -c | escape_html |
pandoc -f markdown_github -t html
rm -f "$_text".part
return 0 }
warning "nothing found to render in $_path"
return 1
# to make http text a link: (breaks if there are <a refs already)
# s|www\.[^ |<]*|<a href="http://&">&</a>|g'
# here we tweak the origin to avoid headers in markdown
# preferring to interpret # as inline preformat
# cat "$_text".part | sed '
# s/^#/ /g
# ' | iconv -c | maruku --html-frag 2>/dev/null | sed '
# s|http://[^ |<]*|<a href="&">&</a>|g
# s|https://[^ |<]*|<a href="&">&</a>|g
# '
# check if its an html only mail
# _html=`mu extract "$_path" | awk '/text\/html/ {print $1; exit}'`
# { test "$_html" = "" } || {
# mu extract --overwrite --parts="$_html" "$_path"
# elinks -dump "$_html".part
# rm "$_html".part
# return 0 }
}
mail_index_render() {
fn mail_index_render $*
_db="${mdsrc}.db"
req=(mdsrc mdname)
freq=($mdsrc $_db)
ckreq || return 1
cat <<EOF > "views/${mdname}.html"
<table class="table table-hover table-condensed">
<thead><tr>
<th style="width:100px"><!-- from name --></th>
<th><!-- subject --></th>
</tr></thead>
EOF
for m in ${(f)"$(maildb_list $_db)"}; do
mail_fill "$m"
cat <<EOF >> "views/${mdname}.html"
<tr>
<td style="vertical-align:middle;"><a href="${WEB_ROOT}/${mdname}.msg/${mail_id}.html">
${mail_from}</a></td>
<td style="vertical-align:middle;word-wrap:break-word">
<a href="${WEB_ROOT}/${mdname}.msg/${mail_id}.html">${mail_subj}</a>
</td>
</tr>
EOF
done
}
mail_atom_render() {
fn mail_atom_render $*
_db="${mdsrc}.db"
req=(mdsrc mdname)
freq=($mdsrc $_db)
ckreq || return 1
cat <<EOF > "views/${mdname}.atom.xml"
<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title type="text">${TITLE}</title>
<link rel="self" href="${WEB_ROOT}/${mdname}.atom.xml" />
<link href="${WEB_ROOT}/${mdname}.html" />
<id>${WEB_ROOT}/${mdname}.atom.xml</id>
<updated>`date --rfc-3339=seconds | sed 's/ /T/'`</updated>
<generator uri="http://www.dyne.org/software/webnomad/">WebNomad</generator>
<subtitle type="html">${DESCRIPTION}</subtitle>
<logo>http://dyne.org/dyne.png</logo>
EOF
# write out the atom entry
for m in ${(f)"$(maildb_list $_db)"}; do
mail_fill "$m"
eurl="${WEB_ROOT}/${mdname}.msg/${mail_id}.html"
cat <<EOF >> "views/${mdname}.atom.xml"
<entry>
<title type="html" xml:lang="en-US">${mail_subj}</title>
<link href="${eurl}" />
<id>${eurl}</id>
<updated>${mail_insert_date}</updated>
<content type="html" xml:lang="en-US">
${mail_subj}
</content>
<author>
<name>${mail_from}</name>
<uri>${eurl}</uri>
</author>
<source>
<title type="html">${mail_subj}</title>
<subtitle type="html">From: ${mail_from}</subtitle>
<updated>${mail_insert_date}</updated>
<link rel="self" href="${eurl}" />
<id>${eurl}</id>
</source>
</entry>
EOF
done
cat <<EOF >> "views/${mdname}.atom.xml"
</feed>
EOF
}
# iterate through the pubdb and publish an rss
pubdb_render_maildir() {
func "publish_render_maildir() $@"
md="$1"
[[ "$md" = "" ]] && {
error "Publish_render_maildir: not found: $md"
return 1
}
maildircheck "${md}" || {
error "Publish_render_maildir: not a maildir: $md"
return 1
}
[[ -r "${md}/pub" ]] || {
error "Publish_render_maildir: webnomad not found in ${md}"
error "Initialize Webnomad inside the maildir that you want published."
return 1
}
views="${md}/views"
pubdb="${md}/.pubdb"
[[ -r "$pubdb" ]] || {
error "Publish_render_maildir: first run update_pubdb for $md"; return 1 }
# ${=mkdir} $TMPPREFIX/pubdb
mails=`maildb_list $md | head -n ${FEED_LIMIT:=30}`
# source webnomad's configurations
[[ -r "${md}/config.zsh" ]] && { source "${md}/config.zsh" }
cat <<EOF > ${md}/views/atom.xml
<?xml version="1.0" encoding="utf-8" standalone="yes" ?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title type="text">${TITLE}</title>
<link rel="self" href="${WEB_ROOT}/atom.xml" />
<link href="${WEB_ROOT}" />
<id>${WEB_ROOT}/atom.xml</id>
<updated>`date --rfc-3339=seconds | sed 's/ /T/'`</updated>
<generator uri="http://www.dyne.org/software/jaro-mail/">JaroMail</generator>
<subtitle type="html">${DESCRIPTION}</subtitle>
<logo>http://dyne.org/dyne.png</logo>
EOF
cat <<EOF > "${md}/views/index.html"
<table class="table table-hover table-condensed">
<thead><tr>
<th style="width:100px"><!-- from name --></th>
<th><!-- subject --></th>
</tr></thead>
EOF
# main loop
c=0
for m in ${(f)mails}; do
[[ "$m[1]" = "." ]] && { _base=`pwd` }
# fill in uid and upath
pubdb_getuid "$_base/$m"
# but skip entries no more existing in maildir
[[ -r "$m" ]] || { continue }
# TODO: remove $m from database if not in maildir
_from=`hdr "$m" | ${WORKDIR}/bin/fetchaddr -x From -a`
_fname=`print ${(Q)_from[(ws:,:)2]}` # | iconv -c`
func "From: ${_fname}"
_subject=`hdr "$m" | awk '
/^Subject:/ { for(i=2;i<=NF;i++) printf "%s ", $i; printf "\n" }
' | escape_html`
# fill in uid and upath
pubdb_getuid "$m"
# fill in the body
_body=`pubdb_extract_body $_base/$m`
[[ "$_body" = "" ]] && { error "Error rendering $m" }
(( ++c ))
if (( $c < ${FEED_LIMIT:=30} )); then
# write out the atom entry
cat <<EOF >> ${md}/views/atom.xml
<entry>
<title type="html" xml:lang="en-US">$_subject</title>
<link href="${WEB_ROOT}/${upath}" />
<id>${WEB_ROOT}/${upath}</id>
<updated>`pubdb_date "$m"`</updated>
<content type="html" xml:lang="en-US">
`print ${(f)_body} | escape_html`
</content>
<author>
<name>${_fname}</name>
<uri>${WEB_ROOT}/${upath}</uri>
</author>
<source>
<title type="html">${_subject}</title>
<subtitle type="html">From: ${_fname}</subtitle>
<updated>${_daterss}</updated>
<link rel="self" href="${WEB_ROOT}/${upath}" />
<id>${WEB_ROOT}/${upath}</id>
</source>
</entry>
EOF
fi # FEED LIMIT not reached
#######
# now build an index and the sitemap
# if using webnomad write out also the message page
[[ -d "${md}/views" ]] && {
act "${_datestring} - ${_subject}"
_datestring=`hdr "$_path" | awk '/^Date/ { print $0 }'`
cat <<EOF > "${md}/views/${upath}"
<h2>${_subject}</h2>
<h4>From: ${_fname} - ${_datestring}</h4>
${_body}
EOF
# add entry in index
cat <<EOF >> "${md}/views/index.html"
<tr>
<td style="vertical-align:middle;"><a href="${WEB_ROOT}/${upath}">${_fname}</a></td>
<td style="vertical-align:middle;word-wrap:break-word">
<a href="${WEB_ROOT}/${upath}">${_subject}</a>
</td>
</tr>
EOF
}
done # loop is over
cat <<EOF >> "${md}/views/atom.xml"
</feed>
EOF
cat <<EOF >> "${md}/views/index.html"
</table>
EOF
notice "Archive website rendering completed"
}

78
render
View File

@ -24,6 +24,7 @@
SYS="`pwd`/webnomad"
helpers=1
source $SYS/zuper
vars+=(SYS DIR CMD dst)
@ -32,10 +33,13 @@ vars+=(TITLE DESCRIPTION KEYWORDS TYPE IMAGE EXTENSION)
vars+=(WEB_ROOT FILES_ROOT THUMB_SIZE BOOTSTRAP destination)
vars+=(FLOWTYPE FONT_RATIO EXIF_CLEAN EXIF_COMMENT EXIF_ROTATE)
vars+=(total_fonts cssfound)
arrs+=(includecss includejs fonts)
arrs+=(custom_fonts)
# used by maildir
vars+=(mail_id mail_render mail_date mail_insert_date mail_from mail_subj)
vars+=(mdsrc mdname)
source $SYS/zuper.init
@ -345,6 +349,11 @@ render_html() {
}
read_meta() {
# read metadata on each file which can change global settings
# format:
# # title put anything here
# # description put your description here
# # keywords list of keywords here
tmp=`head -n 3 | awk '
!/^#/ { next }
/title/ { printf "title=\""; for(i=3;i<=NF;i++) printf "%s ", $i; printf "\";" }
@ -405,7 +414,7 @@ mkdir -p "$destination/css"
mkdir -p "$destination/js"
mkdir -p "$destination/img"
[[ $BOOTSTRAP = 0 ]] || {
[[ "$BOOTSTRAP" = "" ]] || {
#{ test -r "$destination"/css/bootstrap.css } || {
cp "$SYS"/css/bootstrap.css "$destination"/css/
cp "$SYS"/img/* "$destination"/img/
@ -501,6 +510,62 @@ EOF
}
# render all maildirs
maildirs=(`find views -type f -name '*.maildir'`)
maildirs+=(`find views -type f -name '*.maildirs'`)
[[ "${#maildirs}" > 0 ]] && {
source $SYS/maildir
for mdsrc in $maildirs; do
act "Maildirs list: $mdsrc"
# base directory where to render
mdname=${"$(basename $mdsrc)"[(ws:.:)1]}
act "rendering in views/${mdname}.msg"
mkdir -p views/${mdname}.msg
cat ${mdsrc} | read_meta
for md in ${(f)"$(cat $mdsrc)"}; do
act "Maildir rendering: $md"
maildircheck "$md" || continue
mdb="${mdsrc}.db"
[[ -r "$mdb" ]] || maildb_create "$mdb"
# iterate through emails in maildir and insert new ones
mails=`find "${md}/new" "${md}/cur" "${md}/tmp" -type f`
act "`print ${mails} | wc -l` total emails found"
for m in ${(f)mails}; do
mail_id=${"$(sha256sum $m)"[(w)1]}
u=`maildb_lookup_uid "$mdb" "$mail_id"`
[[ "$u" = "" ]] && {
mail_getinfo "$m"
# $u is a new message
act "new message found: $mail_subj"
maildb_insert "$mdb" "$m"
}
done
# iterate through database and render emails missing
for m in ${(f)"$(maildb_list $mdb)"}; do
mail_fill "$m"
[[ -r pub/${mdname}.msg/${mail_id}.html ]] && {
func "skip already rendered: $mail_id"
continue }
mail_file_render \
"$mail_file" \
> "views/${mdname}.msg/${mail_id}.html"
done
done
# render indexes
mail_index_render
mail_atom_render
done
}
# render all HTML views
htmls=(`find views -type f -name '*.html'`)
for src in $htmls; do
@ -516,10 +581,10 @@ done
# render all image galleries
source $SYS/gallery
gals=(`find views -type f -name '*.gal'`)
gals+=(`find views -type f -name '*.gallery'`)
if [[ "${#gals}" > 0 ]]; then
[[ "${#gals}" > 0 ]] && {
source $SYS/gallery
cp "$SYS"/js/*blueimp* "$destination"/js/
cp "$SYS"/css/*blueimp* "$destination"/css/
@ -532,10 +597,9 @@ if [[ "${#gals}" > 0 ]]; then
cat $src | render_gallery > $dst
[[ $QUIET = 1 ]] || print "done"
done
}
fi
# render all directory indexes
# render all directory indexes (TODO: properly fix recursion)
idxs=(`find views -type f -name '*.idx'`)
idxs+=(`find views -type f -name '*.index'`)
{ test ${#idxs} = 0 } || {