rivet/rivet.sh

#!/bin/sh -e

# Copyright (c) 2021 Alessandro Mauri
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

set -e
unset SKIP_FOOTER
unset SKIP_HEADER
unset SKIP_META
unset SKIP_LIST
unset VERBOSE
unset PRINT_HELP
unset SKIP_SITEMAP
unset SKIP_FEED

usage() {
	printf "Usage: rivet [-hvelfsurdt] [-p string] [-o destdir] [-m number] [-n string] srcdir domain\n"
	printf "\t-h: prints this message\n"
	printf "\t-o [destdir]: specifies the output direcotory to be [destdir]\n"
	printf "\t-p [string]: Rename the \"Pages\" section to [string]\n"
	printf "\t-n [string]: Set the title of the atom feed\n"
	printf "\t-d [string]: Set the description of the atom feed\n"
	printf "\t-m [number]: Set the max number of elements in the atom feed,
		0 to include all files\n"
	printf "\t-v: Makes the script verbose\n"
	printf "\t-e: Do not prepend _header.html to .html files\n"
	printf "\t-l: Do not generate \"Pages\" section in index.html\n"
	printf "\t-f: Do not prepend _footer.html to .html files\n"
	printf "\t-s: Do not generate sitemap.xml\n"
	printf "\t-u: Makes all references to the url 'http' instead of 'https'\n"
	printf "\t-r: Do not generate an atom feed\n"
	printf "\t-t: Do not insert contents of _metadata.html"
	exit 2
}

convert() {
	infile="$1"
	extension="${infile##*.}"
	if [ "$extension" = 'md' ]; then
		tmpfile="$(mktemp)"
		outfile="${infile%md}html"
		cp "$infile" "$tmpfile"
		lowdown --html-no-skiphtml --html-no-escapehtml -s -Thtml \
		-o "$outfile" "$tmpfile"
		rm -f "$tmpfile" "$infile"
	fi
	# TODO: convert links to .md to .html
}

# Check dependencies
if ! command -v lowdown > /dev/null; then
    echo "lowdown is not installed"
    exit 1
fi

destdir='dst'
prefix='https'
linksec='Pages'
blog_title='Atom feed'
blog_desc=''
blog_nmax='0'
while getopts 'o:vhelfsrup:n:d:m:t' c
do
	case "$c" in
		o) destdir=${OPTARG%%\/} ;;
		v) VERBOSE=true ;;
		h) PRINT_HELP=true ;;
		e) SKIP_HEADER=true ;;
		l) SKIP_LIST=true ;;
		f) SKIP_FOOTER=true ;;
		s) SKIP_SITEMAP=true ;;
		r) SKIP_FEED=true ;;
		u) prefix='http' ;;
		p) linksec="$OPTARG" ;;
		n) blog_title="$OPTARG" ;;
		d) blog_desc="$OPTARG" ;;
		m) blog_nmax="$OPTARG" ;;
		t) SKIP_META=true ;;
		*) ;;
	esac
done
shift $((OPTIND - 1))
if ! [ "$1" ] || ! [ "$2" ]; then
	echo "Not enough arguments"
	usage
fi

src="$1"
srcdir=${src%%\/}
unset src
headerfile=$srcdir/_header.html
footerfile=$srcdir/_footer.html
metafile=$srcdir/_metadata.html
objlist="$(mktemp)"
objdate="$(mktemp)"

# Check if index.md is present
if ! [ -e "$srcdir"/index.md ]; then
	echo "Missing index.md in $srcdir"
	exit 1
fi

# Check header and footer files
if ! [ -e "$headerfile" ]; then
	echo "Missing _header.html in $srcdir"
	exit 1
fi
if ! [ -e "$footerfile" ]; then
	echo "Missing _footer.html in $srcdir"
	exit 1
fi

if ! [ -e "$metafile" ]; then
	echo "Missing _metadata.html in $srcdir"
	exit 1
fi

# Remove junk from {header,footer} files
sed -i 's/<header.*>//' "$headerfile"
sed -i 's/<\/header>//' "$headerfile"
sed -i 's/<footer.*>//' "$footerfile"
sed -i 's/<\/footer>//' "$footerfile"

# Remove any junk from the domain eg. [https://]domain.com[/]
domain="$(echo "$2" | sed -e 's/^https*:\/\///' -e 's/\/$//')"

# Save the real url
url="$prefix"'://'"$domain"

if [ "$PRINT_HELP" ]; then
	usage
	exit 0
fi
if [ "$VERBOSE" ]; then
	set -x
fi

if ! [ -d "$srcdir" ]; then
	echo "Error: missing source direcotry"
	usage
	exit 1
fi

rm -rf "$destdir"
mkdir -p "$destdir"
cp -r "$srcdir"/* "$destdir"
rm -f "$destdir"/_header.html "$destdir"/_footer.html "$destdir"/_metadata.html

# Generate an ordered (by open time) file list
find "$srcdir" -type f -regex '.*\/[^_].+\..*' -exec ls -1t {} + |
	awk '/.*\.(md|html)$/' > "$objlist"
rm -f "$objdate"
while IFS="" read -r file; do
	stat -c '%y' "$file" >>  "$objdate"
done < "$objlist"
sed -i -e "s,^\/*[^\/]*\/,$destdir/," "$objlist"

# Convert convertible files
while IFS="" read -r file; do
	convert "$file"
done < "$objlist"

# Convert the file list to a list that contains the path of all the html files
sed -i -e 's/\.md$/\.html/' "$objlist"

# Create a list that contains the links to all html files
linklist="$(mktemp)"
cp -f "$objlist" "$linklist"
sed -i -e "s/^$destdir//" "$linklist"

# Insert metadata into <head>
if ! [ "$SKIP_META" ]; then
	find "$destdir" -name "*.html" |
	while IFS="" read -r file; do
		sed -i "/<head>/r $metafile" "$file"
	done
fi

# Prepare index file list
if ! [ "$SKIP_LIST" ]; then
	tmpfile="$(mktemp)"
	rm -f "$tmpfile"
	cat << EOF >> "$tmpfile"
<div id="map">
<h2 id="Pages">$linksec</h2>
EOF
	count='0'
	while IFS="" read -r line; do
		count=$((count + 1))
		if echo "$line" | grep -q 'index\.html'; then
			continue
		fi
		tfile="$(awk "NR==$count" "$objlist")"
		title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
		if [ -z "$title" ]; then
			title="${tfile##*/}"
		fi
		ldate="$(awk "NR==$count" "$objdate" | sed 's/\s.*$//')"
		printf "<p><a href=\"%s\">%s - %s</a></p>\n" "$line" "$ldate" "$title" >> "$tmpfile"
	done < "$linklist"
	echo '</div>' >> "$tmpfile"
	sed -i '/<\/body>/i REPLACE' "$destdir"/index.html
	sed -i "/^REPLACE/r $tmpfile" "$destdir"/index.html
	sed -i 's/^REPLACE//' "$destdir"/index.html
	rm -f "$tmpfile"
fi

# enclose article inside <article></article>
find "$destdir" -name "*.html" |
while IFS="" read -r file; do
	sed -i -e '/<body>/a <article>' -e '/<\/body>/i </article>' "$file"
done

# Prepate the header
if ! [ "$SKIP_HEADER" ]; then
	tmphead="$(mktemp)"
	cp "$headerfile" "$tmphead"
	sed -i '1s/^/<header>/' "$tmphead"
	echo '</header>' >> "$tmphead"
	find "$destdir" -name "*.html" |
	while IFS="" read -r file; do
		sed -i "/<body>/r $tmphead" "$file"
	done
	rm -f "$tmphead"
fi

# Prepare the footer
if ! [ "$SKIP_FOOTER" ]; then
	tmpfoot="$(mktemp)"
	cp "$footerfile" "$tmpfoot"
	sed -i '1s/^/<footer>/' "$tmpfoot"
	echo '</footer>' >> "$tmpfoot"
	find "$destdir" -name "*.html" |
	while IFS="" read -r file; do
		sed -i '/<\/body>/i REPLACE' "$file"
		sed -i "/^REPLACE/r $tmpfoot" "$file"
		sed -i 's/^REPLACE//' "$file"
	done
	rm -f "$tmpfoot"
fi

# Generate sitemap
if ! [ "$SKIP_SITEMAP" ]; then
	cat << EOF >> "$destdir"/sitemap.xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
</urlset>
EOF
	while IFS="" read -r line; do
		sed -i "/<\/urlset>/i \
		<url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml
	done < "$linklist"
	sed -i 's/^<url>/\t<url>/' "$destdir"/sitemap.xml
fi

# Generate atom feed
if ! [ "$SKIP_FEED" ]; then
	feed="$destdir"/atom.xml
	cat << EOF >> "$feed"
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
	<title>$blog_title</title>
	<subtitle>$blog_desc</subtitle>
	<link href="$url/atom.xml/" rel="self" />
	<link href="$url/" />
	<id>$url</id>
	<updated>$(date -Is)</updated>
EOF
	count='0'
	while IFS="" read -r line; do
		count=$((count + 1))
		if [ "$blog_nmax" -gt '0' ]; then
			if [ $count -gt "$blog_nmax" ]; then
				break
			fi
		fi
		tfile="$(awk "NR==$count" "$objlist")"
		title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
		if [ -z "$title" ]; then
			title="${tfile##*/}"
		fi
		ldate="$(awk "NR==$count" "$objdate")"
		{
			printf '\t<entry>\n'
			printf '\t\t<title>%s</title>\n' "$title"
			printf '\t\t<link href="%s" />\n' "$line"
			printf '\t\t<id>%s</id>\n' "$line"
			printf '\t\t<updated>%s</updated>\n' "$ldate"
			printf '\t</entry>\n'
		} >> "$feed"
	done < "$linklist"

	printf '</feed>\n' >> "$feed"
fi

rm -f "$objlist" "$linklist" "$objdate"

exit 0