#!/bin/sh -e # Copyright (c) 2021 Alessandro Mauri # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. set -e unset SKIP_FOOTER unset SKIP_HEADER unset SKIP_META unset SKIP_LIST unset VERBOSE unset PRINT_HELP unset SKIP_SITEMAP unset SKIP_FEED usage() { printf "Usage: rivet [-hvelfsurdt] [-p string] [-o destdir] [-m number] [-n string] srcdir domain\n" printf "\t-h: prints this message\n" printf "\t-o [destdir]: specifies the output direcotory to be [destdir]\n" printf "\t-p [string]: Rename the \"Pages\" section to [string]\n" printf "\t-n [string]: Set the title of the atom feed\n" printf "\t-d [string]: Set the description of the atom feed\n" printf "\t-m [number]: Set the max number of elements in the atom feed, 0 to include all files\n" printf "\t-v: Makes the script verbose\n" printf "\t-e: Do not prepend _header.html to .html files\n" printf "\t-l: Do not generate \"Pages\" section in index.html\n" printf "\t-f: Do not prepend _footer.html to .html files\n" printf "\t-s: Do not generate sitemap.xml\n" printf "\t-u: Makes all references to the url 'http' instead of 'https'\n" printf "\t-r: Do not generate an atom feed\n" printf "\t-t: Do not insert contents of _metadata.html" exit 2 } convert() { infile="$1" extension="${infile##*.}" if [ "$extension" = 'md' ]; then tmpfile="tmpconvfile.tmp" outfile="${infile%md}html" cp "$infile" "$tmpfile" lowdown --html-no-skiphtml --html-no-escapehtml -s -Thtml \ -o "$outfile" "$tmpfile" rm -f "$tmpfile" "$infile" fi # TODO: convert links to .md to .html } # Check dependencies if ! command -v lowdown > /dev/null; then echo "lowdown is not installed" exit 1 fi destdir='dst' prefix='https' linksec='Pages' blog_title='Atom feed' blog_desc='' blog_nmax='0' while getopts 'o:vhelfsrup:n:d:m:t' c do case "$c" in o) destdir=${OPTARG%%\/} ;; v) VERBOSE=true ;; h) PRINT_HELP=true ;; e) SKIP_HEADER=true ;; l) SKIP_LIST=true ;; f) SKIP_FOOTER=true ;; s) SKIP_SITEMAP=true ;; r) SKIP_FEED=true ;; u) prefix='http' ;; p) linksec="$OPTARG" ;; n) blog_title="$OPTARG" ;; d) blog_desc="$OPTARG" ;; m) blog_nmax="$OPTARG" ;; t) SKIP_META=true ;; *) ;; esac done shift $((OPTIND - 1)) if ! [ "$1" ] || ! [ "$2" ]; then echo "Not enough arguments" usage fi src="$1" srcdir=${src%%\/} unset src headerfile=$srcdir/_header.html footerfile=$srcdir/_footer.html metafile=$srcdir/_metadata.html objlist=objlist.tmp objdate=objdate.tmp # Check if index.md is present if ! [ -e "$srcdir"/index.md ]; then echo "Missing index.md in $srcdir" exit 1 fi # Check header and footer files if ! [ -e "$headerfile" ]; then echo "Missing _header.html in $srcdir" exit 1 fi if ! [ -e "$footerfile" ]; then echo "Missing _footer.html in $srcdir" exit 1 fi # Remove junk from {header,footer} files sed -i 's///' "$headerfile" sed -i 's/<\/header>//' "$headerfile" sed -i 's///' "$footerfile" sed -i 's/<\/footer>//' "$footerfile" # Remove any junk from the domain eg. [https://]domain.com[/] domain="$(echo "$2" | sed -e 's/^https*:\/\///' -e 's/\/$//')" # Save the real url url="$prefix"'://'"$domain" if [ "$PRINT_HELP" ]; then usage exit 0 fi if [ "$VERBOSE" ]; then set -x fi if ! [ -d "$srcdir" ]; then echo "Error: missing source direcotry" usage exit 1 fi rm -rf "$destdir" mkdir -p "$destdir" cp -r "$srcdir"/* "$destdir" rm -f "$destdir"/_header.html "$destdir"/_footer.html # Generate an ordered (by open time) file list find "$srcdir" -type f -regex '.*\/[^_].+\..*' -exec ls -1t {} + | awk '/.*\.(md|html)$/' > "$objlist" rm -f "$objdate" while IFS="" read -r file; do stat -c '%y' "$file" >> "$objdate" done < "$objlist" sed -i -e "s,^\/*[^\/]*\/,$destdir/," "$objlist" # Convert convertible files while IFS="" read -r file; do convert "$file" done < "$objlist" # Convert the file list to a list that contains the path of all the html files sed -i -e 's/\.md$/\.html/' "$objlist" # Create a list that contains the links to all html files linklist=linklist.tmp cp -f "$objlist" "$linklist" sed -i -e "s/^$destdir//" -e "s/^/$prefix:\/\/$domain/" "$linklist" # Insert metadata into if ! [ "$SKIP_META" ]; then find "$destdir" -name "*.html" | while IFS="" read -r file; do sed -i "//r $metafile" "$file" done fi # Prepare index file list if ! [ "$SKIP_LIST" ]; then tmpfile="linkindex.tmp" rm -f "$tmpfile" cat << EOF >> "$tmpfile"

$linksec

EOF count='0' while IFS="" read -r line; do count=$((count + 1)) if echo "$line" | grep -q 'index\.html'; then continue fi tfile="$(awk "NR==$count" "$objlist")" title="$(sed -E -n 's/.*\s*(.+)\s*<\/title>.*/\1/p' "$tfile")" if [ -z "$title" ]; then title="${tfile##*/}" fi ldate="$(awk "NR==$count" "$objdate" | sed 's/\s.*$//')" printf "<p><a href=\"%s\">%s - %s</a></p>\n" "$line" "$ldate" "$title" >> "$tmpfile" done < "$linklist" echo '</div>' >> "$tmpfile" sed -i '/<\/body>/i REPLACE' "$destdir"/index.html sed -i "/^REPLACE/r $tmpfile" "$destdir"/index.html sed -i 's/^REPLACE//' "$destdir"/index.html rm -f "$tmpfile" fi # enclose article inside <article></article> find "$destdir" -name "*.html" | while IFS="" read -r file; do sed -i -e '/<body>/a <article>' -e '/<\/body>/i </article>' "$file" done # Prepate the header if ! [ "$SKIP_HEADER" ]; then tmphead="tmpheadfile.tmp" cp "$headerfile" "$tmphead" sed -i '1s/^/<header>/' "$tmphead" echo '</header>' >> "$tmphead" find "$destdir" -name "*.html" | while IFS="" read -r file; do sed -i "/<body>/r $tmphead" "$file" done rm -f "$tmphead" fi # Prepare the footer if ! [ "$SKIP_FOOTER" ]; then tmpfoot="tmpfootfile.tmp" cp "$footerfile" "$tmpfoot" sed -i '1s/^/<footer>/' "$tmpfoot" echo '</footer>' >> "$tmpfoot" find "$destdir" -name "*.html" | while IFS="" read -r file; do sed -i '/<\/body>/i REPLACE' "$file" sed -i "/^REPLACE/r $tmpfoot" "$file" sed -i 's/^REPLACE//' "$file" done rm -f "$tmpfoot" fi # Generate sitemap if ! [ "$SKIP_SITEMAP" ]; then cat << EOF >> "$destdir"/sitemap.xml <?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> </urlset> EOF while IFS="" read -r line; do sed -i "/<\/urlset>/i \ <url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml done < "$linklist" sed -i 's/^<url>/\t<url>/' "$destdir"/sitemap.xml fi # Generate atom feed if ! [ "$SKIP_FEED" ]; then feed="$destdir"/atom.xml cat << EOF >> "$feed" <?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title>$blog_title $blog_desc $url $(date -Is) EOF count='0' while IFS="" read -r line; do count=$((count + 1)) if [ "$blog_nmax" -gt '0' ]; then if [ $count -gt "$blog_nmax" ]; then break fi fi tfile="$(awk "NR==$count" "$objlist")" title="$(sed -E -n 's/.*\s*(.+)\s*<\/title>.*/\1/p' "$tfile")" if [ -z "$title" ]; then title="${tfile##*/}" fi ldate="$(awk "NR==$count" "$objdate")" { printf '\t<entry>\n' printf '\t\t<title>%s\n' "$title" printf '\t\t\n' "$line" printf '\t\t%s\n' "$line" printf '\t\t%s\n' "$ldate" printf '\t\n' } >> "$feed" done < "$linklist" printf '\n' >> "$feed" fi rm -f "$objlist" "$linklist" "$objdate" exit 0