From 4c6f06b6b7947c254b5fc9b3df82ef31277e8bf7 Mon Sep 17 00:00:00 2001 From: Alessandro Mauri Date: Sun, 21 Mar 2021 14:22:20 +0100 Subject: [PATCH] version 0.2 changes: - rivet now generates an atom feed with the last 3 (configurable) modified articles - rivet now generates links in the index list to html files in srcdir - Articles with unkown title will have their filename as the title - minor code refractor --- TODO | 5 -- makefile | 2 +- rivet.1 | 11 ++++ rivet.sh | 187 +++++++++++++++++++++++++++++++++++-------------------- 4 files changed, 133 insertions(+), 72 deletions(-) diff --git a/TODO b/TODO index 165a8d5..1cf1f05 100644 --- a/TODO +++ b/TODO @@ -1,15 +1,10 @@ Improvements ============ -* Order article list by last modified * Add support for article description in list -* Add support for html files in srcdir, currently they are being copied but no - link is generated * Convert links to present .md files to .html * Add support for a ignore list (.rivetignore) maybe per-folder * MORE COMMENTS Additions ========= - -* Generate a RSS or Atom feed alonside the other stuff diff --git a/makefile b/makefile index 90842c1..099f7c9 100644 --- a/makefile +++ b/makefile @@ -1,4 +1,4 @@ -VERSION = 0.1 +VERSION = 0.2 PREFIX = /usr/local MANPREFIX = ${PREFIX}/share/man diff --git a/rivet.1 b/rivet.1 index 886fad1..6bf4e7a 100644 --- a/rivet.1 +++ b/rivet.1 @@ -42,6 +42,15 @@ Renames the "Pages" section in index.html to .I string be aware that altough the section name will be changed the id will remain "Pages" +.IP "\-n string" +Set the title of the atom feed to +.I string +.IP "\-d string" +Set the description of the atom feed to +.I string +.IP "\-n number" +Set the maximum number of elements in the atom feed to +.I number .IP \-v verbose option, sets -x and prints every command as it runs .IP \-h @@ -56,6 +65,8 @@ Disables the application of the user-supplied footer in Disables the generation of the "Pages" section in index.html .IP \-s Disables the sitemap generation +.IP \-r +Disables rss/atom feed generation .IP \-u Changes the default protocol used in links from https to http diff --git a/rivet.sh b/rivet.sh index 22c102a..da34d76 100644 --- a/rivet.sh +++ b/rivet.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/sh -e # Copyright (c) 2020 Alessandro Mauri # @@ -27,29 +27,37 @@ unset SKIP_LIST unset VERBOSE unset PRINT_HELP unset SKIP_SITEMAP +unset SKIP_FEED usage() { printf "Usage: rivet [-hovelfsu] [-p string] [-o destdir] srcdir domain\n" printf "\t-h: prints this message\n" printf "\t-o [destdir]: specifies the output direcotory to be [destdir]\n" - printf "\t-p: [string]: Rename the \"Pages\" section to [string]\n" + printf "\t-p [string]: Rename the \"Pages\" section to [string]\n" + printf "\t-n [string]: Set the title of the atom feed" + printf "\t-d [string]: Set the description of the atom feed" + printf "\t-m [number]: Set the max number of elements in the atom feed" printf "\t-v: Makes the script verbose\n" printf "\t-e: Do not prepend _header.html to .html files\n" printf "\t-f: Do not prepend _footer.html to .html files\n" printf "\t-l: Do not generate \"Pages\" section in index.html\n" printf "\t-s: Do not generate sitemap.xml\n" + printf "\t-r: Do not generate an atom feed\n" printf "\t-u: Makes all references to the url 'http' instead of 'https'\n" exit 2 } convert() { - tmpfile="tmpconvfile.tmp" infile="$1" - outfile="${infile%???}.html" - cp "$infile" "$tmpfile" + extension="${infile##*.}" + if [ "$extension" = 'md' ]; then + tmpfile="tmpconvfile.tmp" + outfile="${infile%md}html" + cp "$infile" "$tmpfile" + lowdown -s -Thtml -o "$outfile" "$tmpfile" + rm -f "$tmpfile" "$infile" + fi # TODO: convert links to .md to .html - lowdown -s -Thtml -o "$outfile" "$tmpfile" - rm -f "$tmpfile" "$infile" } # Check dependencies @@ -61,7 +69,10 @@ fi destdir='dst' prefix='https' linksec='Pages' -while getopts 'o:vhelfsup:' c +blog_title='Atom feed' +blog_desc='' +blog_nmax='3' +while getopts 'o:vhelfsrup:n:d:m:' c do case "$c" in o) destdir=${OPTARG%%\/} ;; @@ -71,8 +82,12 @@ do l) SKIP_LIST=true ;; f) SKIP_FOOTER=true ;; s) SKIP_SITEMAP=true ;; + r) SKIP_FEED=true ;; u) prefix='http' ;; p) linksec="$OPTARG" ;; + n) blog_title="$OPTARG" ;; + d) blog_desc="$OPTARG" ;; + m) blog_nmax="$OPTARG" ;; *) ;; esac done @@ -87,7 +102,7 @@ srcdir=${src%%\/} unset src headerfile=$srcdir/_header.html footerfile=$srcdir/_footer.html -filelist=filelist.tmp +objlist=objlist.tmp # Check if index.md is present if ! [ -e "$srcdir"/index.md ]; then @@ -111,10 +126,10 @@ sed -i 's///' "$footerfile" sed -i 's/<\/footer>//' "$footerfile" # Remove any junk from the domain eg. [https://]domain.com[/] -url="$(echo "$2" | - sed -e 's/^https*:\/\///' | - sed -e 's/\/$//' | - sed -e 's/[]\/$*.^[]/\\&/g')" +domain="$(echo "$2" | sed -e 's/^https*:\/\///' -e 's/\/$//')" + +# Save the real url +url="$prefix"'://'"$domain" if [ "$PRINT_HELP" ]; then usage @@ -134,82 +149,122 @@ cp -r "$srcdir"/* "$destdir" rm -f "$destdir"/_header.html "$destdir"/_footer.html # Generate an ordered (by open time) file list -find "$srcdir" -type f -name "*.md" -exec ls -1t {} + > "$filelist" -sed -i "s,^\/*[^\/]*\/,$destdir/," "$filelist" +find "$srcdir" -type f -regex '.*\/[^_].+\..*' -exec ls -1t {} + | + awk '/.*\.(md|html)$/' > "$objlist" +sed -i -e "s,^\/*[^\/]*\/,$destdir/," "$objlist" # Convert markdown files while IFS="" read -r file; do convert "$file" -done < "$filelist" -sed -i 's/\.md$/\.html/' "$filelist" +done < "$objlist" + +# Convert the file list to a list that contains the path of all the html files +sed -i -e 's/\.md$/\.html/' "$objlist" + +# Create a list that contains the links to all html files +linklist=linklist.tmp +cp -f "$objlist" "$linklist" +sed -i -e "s/^$destdir//" -e "s/^/$prefix:\/\/$domain/" "$linklist" # Prepare the header if ! [ "$SKIP_HEADER" ]; then find "$destdir" -name "*.html" | while IFS="" read -r file; do - sed -i "//r $headerfile" "$file" + sed -i "//r $headerfile" "$file" done fi # Prepate the footer if ! [ "$SKIP_FOOTER" ]; then - tmpfoot="tmpfootfile.tmp" - cp "$footerfile" "$tmpfoot" - sed -i '1s/^/
/' "$tmpfoot" - echo '
' >> "$tmpfoot" - find "$destdir" -name "*.html" | - while IFS="" read -r file; do - sed -i "/<\/body>/r $tmpfoot" "$file" - done - rm -f "$tmpfoot" +tmpfoot="tmpfootfile.tmp" +cp "$footerfile" "$tmpfoot" +sed -i '1s/^/
/' "$tmpfoot" +echo '
' >> "$tmpfoot" +find "$destdir" -name "*.html" | +while IFS="" read -r file; do + sed -i "/<\/body>/r $tmpfoot" "$file" +done +rm -f "$tmpfoot" fi -# Prepare the sitemap & file list -if ! [ "$SKIP_SITEMAP" ] || ! [ "$SKIP_LIST" ]; then - linklist="linklist.tmp" - rm -f "$linklist" "$destdir"/sitemap.xml - while IFS="" read -r file; do - echo "${file#$destdir/}" >> "$linklist" - done < "$filelist" - - if ! [ "$SKIP_LIST" ]; then - tmpfile="linkindex.tmp" - rm -f "$tmpfile" - cat << EOF >> "$tmpfile" +# Prepare index file list +if ! [ "$SKIP_LIST" ]; then + tmpfile="linkindex.tmp" + rm -f "$tmpfile" + cat << EOF >> "$tmpfile"

$linksec

EOF - while IFS="" read -r line; do - if echo "$line" | grep -q 'index\.html'; then - continue - fi - title="$(grep -e '^.*<\/title>' "$destdir"/"$line" | - sed -e 's/<title>//' -e 's/<\/title>//')" - if ! [ "$title" ] || echo "$title" | grep -q 'Untitled article'; then - title=${line%?????} - fi - printf "<p><a href=\"%s\">%s</a></p>\n" "$line" "$title" >> "$tmpfile" - done < "$linklist" - echo '</div>' >> "$tmpfile" - sed -i '/<\/body>/i REPLACE' "$destdir"/index.html - sed -i "/^REPLACE/r $tmpfile" "$destdir"/index.html - sed -i 's/^REPLACE//' "$destdir"/index.html - rm -f "$tmpfile" - fi + count='0' + while IFS="" read -r line; do + count=$((count + 1)) + if echo "$line" | grep -q 'index\.html'; then + continue + fi + tfile="$(awk "NR==$count" "$objlist")" + title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")" + if [ -z "$title" ]; then + title="${tfile##*/}" + fi + printf "<p><a href=\"%s\">%s</a></p>\n" "$line" "$title" >> "$tmpfile" + done < "$linklist" + echo '</div>' >> "$tmpfile" + sed -i '/<\/body>/i REPLACE' "$destdir"/index.html + sed -i "/^REPLACE/r $tmpfile" "$destdir"/index.html + sed -i 's/^REPLACE//' "$destdir"/index.html + rm -f "$tmpfile" +fi - if ! [ "$SKIP_SITEMAP" ]; then - sed -i -e "s/^/$prefix:\/\/$url\//" "$linklist" - cat << EOF >> "$destdir"/sitemap.xml +# Generate sitemap +if ! [ "$SKIP_SITEMAP" ]; then + cat << EOF >> "$destdir"/sitemap.xml <?xml version="1.0" encoding="UTF-8"?> <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> </urlset> EOF - while IFS="" read -r line; do - sed -i "/<\/urlset>/i \ - <url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml - done < "$linklist" - sed -i 's/^<url>/\t<url>/' "$destdir"/sitemap.xml - fi - rm -f "$linklist" "$filelist" + while IFS="" read -r line; do + sed -i "/<\/urlset>/i \ + <url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml + done < "$linklist" + sed -i 's/^<url>/\t<url>/' "$destdir"/sitemap.xml fi +# Generate atom feed +if ! [ "$SKIP_FEED" ]; then + feed="$destdir"/atom.xml + cat << EOF >> "$feed" +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom"> + <title>$blog_title + $blog_desc + + + $url + $(date -Is) +EOF + count='0' + while IFS="" read -r line; do + count=$((count + 1)) + if [ $count -gt "$blog_nmax" ]; then + break + fi + tfile="$(awk "NR==$count" "$objlist")" + title="$(sed -E -n 's/.*\s*(.+)\s*<\/title>.*/\1/p' "$tfile")" + if [ -z "$title" ]; then + title="${tfile##*/}" + fi + ldate="$(stat -c '%y' "$tfile")" + { + printf '\t<entry>\n' + printf '\t\t<title>%s\n' "$title" + printf '\t\t\n' "$line" + printf '\t\t%s\n' "$ldate" + printf '\t\n' + } >> "$feed" + done < "$linklist" + + printf '\n' >> "$feed" +fi + +rm -f "$objlist" "$linklist" + exit