From 4c6f06b6b7947c254b5fc9b3df82ef31277e8bf7 Mon Sep 17 00:00:00 2001
From: Alessandro Mauri <alemauri001@gmail.com>
Date: Sun, 21 Mar 2021 14:22:20 +0100
Subject: [PATCH] version 0.2

changes:
	- rivet now generates an atom feed with the last 3 (configurable)
		modified articles
	- rivet now generates links in the index list to html files in srcdir
	- Articles with unkown title will have their filename as the title
	- minor code refractor
---
 TODO     |   5 --
 makefile |   2 +-
 rivet.1  |  11 ++++
 rivet.sh | 187 +++++++++++++++++++++++++++++++++++--------------------
 4 files changed, 133 insertions(+), 72 deletions(-)

diff --git a/TODO b/TODO
index 165a8d5..1cf1f05 100644
--- a/TODO
+++ b/TODO
@@ -1,15 +1,10 @@
 Improvements
 ============
 
-* Order article list by last modified
 * Add support for article description in list
-* Add support for html files in srcdir, currently they are being copied but no
-	link is generated
 * Convert links to present .md files to .html
 * Add support for a ignore list (.rivetignore) maybe per-folder
 * MORE COMMENTS
 
 Additions
 =========
-
-* Generate a RSS or Atom feed alonside the other stuff
diff --git a/makefile b/makefile
index 90842c1..099f7c9 100644
--- a/makefile
+++ b/makefile
@@ -1,4 +1,4 @@
-VERSION = 0.1
+VERSION = 0.2
 PREFIX = /usr/local
 MANPREFIX = ${PREFIX}/share/man
 
diff --git a/rivet.1 b/rivet.1
index 886fad1..6bf4e7a 100644
--- a/rivet.1
+++ b/rivet.1
@@ -42,6 +42,15 @@ Renames the "Pages" section in index.html to
 .I string
 be aware that altough the section name will be changed the id will remain
 "Pages"
+.IP "\-n string"
+Set the title of the atom feed to
+.I string
+.IP "\-d string"
+Set the description of the atom feed to
+.I string
+.IP "\-n number"
+Set the maximum number of elements in the atom feed to 
+.I number
 .IP \-v
 verbose option, sets -x and prints every command as it runs
 .IP \-h
@@ -56,6 +65,8 @@ Disables the application of the user-supplied footer in
 Disables the generation of the "Pages" section in index.html
 .IP \-s
 Disables the sitemap generation
+.IP \-r
+Disables rss/atom feed generation
 .IP \-u
 Changes the default protocol used in links from https to http
 
diff --git a/rivet.sh b/rivet.sh
index 22c102a..da34d76 100644
--- a/rivet.sh
+++ b/rivet.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -e
 
 # Copyright (c) 2020 Alessandro Mauri
 # 
@@ -27,29 +27,37 @@ unset SKIP_LIST
 unset VERBOSE
 unset PRINT_HELP
 unset SKIP_SITEMAP
+unset SKIP_FEED
 
 usage() {
 	printf "Usage: rivet [-hovelfsu] [-p string] [-o destdir] srcdir domain\n"
 	printf "\t-h: prints this message\n"
 	printf "\t-o [destdir]: specifies the output direcotory to be [destdir]\n"
-	printf "\t-p: [string]: Rename the \"Pages\" section to [string]\n"
+	printf "\t-p [string]: Rename the \"Pages\" section to [string]\n"
+	printf "\t-n [string]: Set the title of the atom feed"
+	printf "\t-d [string]: Set the description of the atom feed"
+	printf "\t-m [number]: Set the max number of elements in the atom feed"
 	printf "\t-v: Makes the script verbose\n"
 	printf "\t-e: Do not prepend _header.html to .html files\n"
 	printf "\t-f: Do not prepend _footer.html to .html files\n"
 	printf "\t-l: Do not generate \"Pages\" section in index.html\n"
 	printf "\t-s: Do not generate sitemap.xml\n"
+	printf "\t-r: Do not generate an atom feed\n"
 	printf "\t-u: Makes all references to the url 'http' instead of 'https'\n"
 	exit 2
 }
 
 convert() {
-	tmpfile="tmpconvfile.tmp"
 	infile="$1"
-	outfile="${infile%???}.html"
-	cp "$infile" "$tmpfile"
+	extension="${infile##*.}"
+	if [ "$extension" = 'md' ]; then
+		tmpfile="tmpconvfile.tmp"
+		outfile="${infile%md}html"
+		cp "$infile" "$tmpfile"
+		lowdown -s -Thtml -o "$outfile" "$tmpfile"
+		rm -f "$tmpfile" "$infile"
+	fi
 	# TODO: convert links to .md to .html
-	lowdown -s -Thtml -o "$outfile" "$tmpfile"
-	rm -f "$tmpfile" "$infile"
 }
 
 # Check dependencies
@@ -61,7 +69,10 @@ fi
 destdir='dst'
 prefix='https'
 linksec='Pages'
-while getopts 'o:vhelfsup:' c
+blog_title='Atom feed'
+blog_desc=''
+blog_nmax='3'
+while getopts 'o:vhelfsrup:n:d:m:' c
 do
 	case "$c" in
 		o) destdir=${OPTARG%%\/} ;;
@@ -71,8 +82,12 @@ do
 		l) SKIP_LIST=true ;;
 		f) SKIP_FOOTER=true ;;
 		s) SKIP_SITEMAP=true ;;
+		r) SKIP_FEED=true ;;
 		u) prefix='http' ;;
 		p) linksec="$OPTARG" ;;
+		n) blog_title="$OPTARG" ;;
+		d) blog_desc="$OPTARG" ;;
+		m) blog_nmax="$OPTARG" ;;
 		*) ;;
 	esac
 done
@@ -87,7 +102,7 @@ srcdir=${src%%\/}
 unset src
 headerfile=$srcdir/_header.html
 footerfile=$srcdir/_footer.html
-filelist=filelist.tmp
+objlist=objlist.tmp
 
 # Check if index.md is present
 if ! [ -e "$srcdir"/index.md ]; then
@@ -111,10 +126,10 @@ sed -i 's/<footer.*>//' "$footerfile"
 sed -i 's/<\/footer>//' "$footerfile"
 
 # Remove any junk from the domain eg. [https://]domain.com[/]
-url="$(echo "$2" |
-	sed -e 's/^https*:\/\///' |
-	sed -e 's/\/$//' |
-	sed -e 's/[]\/$*.^[]/\\&/g')"
+domain="$(echo "$2" | sed -e 's/^https*:\/\///' -e 's/\/$//')"
+
+# Save the real url
+url="$prefix"'://'"$domain"
 
 if [ "$PRINT_HELP" ]; then
 	usage
@@ -134,82 +149,122 @@ cp -r "$srcdir"/* "$destdir"
 rm -f "$destdir"/_header.html "$destdir"/_footer.html
 
 # Generate an ordered (by open time) file list
-find "$srcdir" -type f -name "*.md" -exec ls -1t {} + > "$filelist"
-sed -i "s,^\/*[^\/]*\/,$destdir/," "$filelist"
+find "$srcdir" -type f -regex '.*\/[^_].+\..*' -exec ls -1t {} + |
+	awk '/.*\.(md|html)$/' > "$objlist"
+sed -i -e "s,^\/*[^\/]*\/,$destdir/," "$objlist"
 
 # Convert markdown files
 while IFS="" read -r file; do
 	convert "$file"
-done < "$filelist"
-sed -i 's/\.md$/\.html/' "$filelist"
+done < "$objlist"
+
+# Convert the file list to a list that contains the path of all the html files
+sed -i -e 's/\.md$/\.html/' "$objlist"
+
+# Create a list that contains the links to all html files
+linklist=linklist.tmp
+cp -f "$objlist" "$linklist"
+sed -i -e "s/^$destdir//" -e "s/^/$prefix:\/\/$domain/" "$linklist"
 
 # Prepare the header
 if ! [ "$SKIP_HEADER" ]; then
 	find "$destdir" -name "*.html" |
 	while IFS="" read -r file; do
-		sed -i "/<head>/r $headerfile"  "$file"
+		sed -i "/<head>/r $headerfile" "$file"
 	done
 fi
 # Prepate the footer
 if ! [ "$SKIP_FOOTER" ]; then
-	tmpfoot="tmpfootfile.tmp"
-	cp "$footerfile" "$tmpfoot"
-	sed -i '1s/^/<footer>/' "$tmpfoot"
-	echo '</footer>' >> "$tmpfoot"
-	find "$destdir" -name "*.html" |
-	while IFS="" read -r file; do
-		sed -i "/<\/body>/r $tmpfoot" "$file"
-	done
-	rm -f "$tmpfoot"
+tmpfoot="tmpfootfile.tmp"
+cp "$footerfile" "$tmpfoot"
+sed -i '1s/^/<footer>/' "$tmpfoot"
+echo '</footer>' >> "$tmpfoot"
+find "$destdir" -name "*.html" |
+while IFS="" read -r file; do
+	sed -i "/<\/body>/r $tmpfoot" "$file"
+done
+rm -f "$tmpfoot"
 fi
 
-# Prepare the sitemap & file list
-if ! [ "$SKIP_SITEMAP" ] || ! [ "$SKIP_LIST" ]; then
-	linklist="linklist.tmp"
-	rm -f "$linklist" "$destdir"/sitemap.xml
-	while IFS="" read -r file; do
-		echo "${file#$destdir/}" >> "$linklist"
-	done < "$filelist"
-
-	if ! [ "$SKIP_LIST" ]; then
-		tmpfile="linkindex.tmp"
-		rm -f "$tmpfile"
-		cat << EOF >> "$tmpfile"
+# Prepare index file list
+if ! [ "$SKIP_LIST" ]; then
+	tmpfile="linkindex.tmp"
+	rm -f "$tmpfile"
+	cat << EOF >> "$tmpfile"
 <div id="map">
 <h2 id="Pages">$linksec</h2>
 EOF
-		while IFS="" read -r line; do
-			if echo "$line" | grep -q 'index\.html'; then
-				continue
-			fi
-			title="$(grep -e '^<title>.*<\/title>' "$destdir"/"$line" |
-			sed -e 's/<title>//' -e 's/<\/title>//')"
-			if ! [ "$title" ] || echo "$title" | grep -q 'Untitled article'; then
-				title=${line%?????}
-			fi
-			printf "<p><a href=\"%s\">%s</a></p>\n" "$line" "$title" >> "$tmpfile"
-		done < "$linklist"
-		echo '</div>' >> "$tmpfile"
-		sed -i '/<\/body>/i REPLACE' "$destdir"/index.html
-		sed -i "/^REPLACE/r $tmpfile" "$destdir"/index.html
-		sed -i 's/^REPLACE//' "$destdir"/index.html
-		rm -f "$tmpfile"
-	fi
+	count='0'
+	while IFS="" read -r line; do
+		count=$((count + 1))
+		if echo "$line" | grep -q 'index\.html'; then
+			continue
+		fi
+		tfile="$(awk "NR==$count" "$objlist")"
+		title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
+		if [ -z "$title" ]; then
+			title="${tfile##*/}"
+		fi
+		printf "<p><a href=\"%s\">%s</a></p>\n" "$line" "$title" >> "$tmpfile"
+	done < "$linklist"
+	echo '</div>' >> "$tmpfile"
+	sed -i '/<\/body>/i REPLACE' "$destdir"/index.html
+	sed -i "/^REPLACE/r $tmpfile" "$destdir"/index.html
+	sed -i 's/^REPLACE//' "$destdir"/index.html
+	rm -f "$tmpfile"
+fi
 
-	if ! [ "$SKIP_SITEMAP" ]; then
-		sed -i -e "s/^/$prefix:\/\/$url\//" "$linklist"
-		cat << EOF >>  "$destdir"/sitemap.xml
+# Generate sitemap
+if ! [ "$SKIP_SITEMAP" ]; then
+	cat << EOF >> "$destdir"/sitemap.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 </urlset> 
 EOF
-		while IFS="" read -r line; do
-			sed -i "/<\/urlset>/i \
-			<url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml
-		done < "$linklist"
-		sed -i 's/^<url>/\t<url>/' "$destdir"/sitemap.xml
-	fi
-	rm -f "$linklist" "$filelist"
+	while IFS="" read -r line; do
+		sed -i "/<\/urlset>/i \
+		<url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml
+	done < "$linklist"
+	sed -i 's/^<url>/\t<url>/' "$destdir"/sitemap.xml
 fi
 
+# Generate atom feed
+if ! [ "$SKIP_FEED" ]; then
+	feed="$destdir"/atom.xml
+	cat << EOF >> "$feed"
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+	<title>$blog_title</title>
+	<subtitle>$blog_desc</subtitle>
+	<link href="$url/atom.xml/" rel="self" />
+	<link href="$url/" />
+	<id>$url</id>
+	<updated>$(date -Is)</updated>
+EOF
+	count='0'
+	while IFS="" read -r line; do
+		count=$((count + 1))
+		if [ $count -gt "$blog_nmax" ]; then
+			break
+		fi
+		tfile="$(awk "NR==$count" "$objlist")"
+		title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
+		if [ -z "$title" ]; then
+			title="${tfile##*/}"
+		fi
+		ldate="$(stat -c '%y' "$tfile")"
+		{
+			printf '\t<entry>\n'
+			printf '\t\t<title>%s</title>\n' "$title"
+			printf '\t\t<link href="%s" />\n' "$line"
+			printf '\t\t<updated>%s</updated>\n' "$ldate"
+			printf '\t</entry>\n'
+		} >> "$feed"
+	done < "$linklist"
+
+	printf '</feed>\n' >> "$feed"
+fi
+
+rm -f "$objlist" "$linklist"
+
 exit