forked from alema/rivet
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
335 lines
9.1 KiB
335 lines
9.1 KiB
#!/bin/sh
|
|
|
|
# Copyright (c) 2021 Alessandro Mauri
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all
|
|
# copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
# SOFTWARE.
|
|
|
|
set -e
|
|
unset SKIP_FOOTER
|
|
unset SKIP_HEADER
|
|
unset SKIP_META
|
|
unset SKIP_LIST
|
|
unset VERBOSE
|
|
unset PRINT_HELP
|
|
unset SKIP_SITEMAP
|
|
unset SKIP_FEED
|
|
|
|
usage() {
|
|
printf "Usage: rivet [-hvelfsurdt] [-p string] [-o destdir] [-m number] [-n string] srcdir domain\n"
|
|
printf "\t-h: prints this message\n"
|
|
printf "\t-o [destdir]: specifies the output direcotory to be [destdir]\n"
|
|
printf "\t-p [string]: Rename the \"Pages\" section to [string]\n"
|
|
printf "\t-n [string]: Set the title of the atom feed\n"
|
|
printf "\t-d [string]: Set the description of the atom feed\n"
|
|
printf "\t-m [number]: Set the max number of elements in the atom feed,
|
|
0 to include all files\n"
|
|
printf "\t-v: Makes the script verbose\n"
|
|
printf "\t-e: Do not prepend _header.html to .html files\n"
|
|
printf "\t-l: Do not generate \"Pages\" section in index.html\n"
|
|
printf "\t-f: Do not prepend _footer.html to .html files\n"
|
|
printf "\t-s: Do not generate sitemap.xml\n"
|
|
printf "\t-u: Makes all references to the url 'http' instead of 'https'\n"
|
|
printf "\t-r: Do not generate an atom feed\n"
|
|
printf "\t-t: Do not insert contents of _metadata.html"
|
|
exit 2
|
|
}
|
|
|
|
convert() {
|
|
infile="$1"
|
|
extension="${infile##*.}"
|
|
if [ "$extension" = 'md' ]; then
|
|
tmpfile="$(mktemp)"
|
|
outfile="${infile%md}html"
|
|
cp "$infile" "$tmpfile"
|
|
lowdown --html-no-skiphtml --html-no-escapehtml -s -Thtml \
|
|
-o "$outfile" "$tmpfile"
|
|
rm -f "$tmpfile" "$infile"
|
|
fi
|
|
# TODO: convert links to .md to .html
|
|
}
|
|
|
|
# Check dependencies
|
|
if ! command -v lowdown > /dev/null; then
|
|
echo "lowdown is not installed"
|
|
exit 1
|
|
fi
|
|
|
|
destdir='dst'
|
|
prefix='https'
|
|
linksec='Pages'
|
|
blog_title='Atom feed'
|
|
blog_desc=''
|
|
blog_nmax='0'
|
|
while getopts 'o:vhelfsrup:n:d:m:t' c
|
|
do
|
|
case "$c" in
|
|
o) destdir=${OPTARG%%\/} ;;
|
|
v) VERBOSE=true ;;
|
|
h) PRINT_HELP=true ;;
|
|
e) SKIP_HEADER=true ;;
|
|
l) SKIP_LIST=true ;;
|
|
f) SKIP_FOOTER=true ;;
|
|
s) SKIP_SITEMAP=true ;;
|
|
r) SKIP_FEED=true ;;
|
|
u) prefix='http' ;;
|
|
p) linksec="$OPTARG" ;;
|
|
n) blog_title="$OPTARG" ;;
|
|
d) blog_desc="$OPTARG" ;;
|
|
m) blog_nmax="$OPTARG" ;;
|
|
t) SKIP_META=true ;;
|
|
*) ;;
|
|
esac
|
|
done
|
|
shift $((OPTIND - 1))
|
|
if ! [ "$1" ] || ! [ "$2" ]; then
|
|
echo "Not enough arguments"
|
|
usage
|
|
fi
|
|
|
|
src="$1"
|
|
srcdir=${src%%\/}
|
|
unset src
|
|
headerfile=$srcdir/_header.html
|
|
footerfile=$srcdir/_footer.html
|
|
metafile=$srcdir/_metadata.html
|
|
objlist="$(mktemp)"
|
|
objdate="$(mktemp)"
|
|
tmpsed="$(mktemp)"
|
|
|
|
# Check if index.md is present
|
|
if ! [ -e "$srcdir"/index.md ]; then
|
|
echo "Missing index.md in $srcdir"
|
|
exit 1
|
|
fi
|
|
|
|
# Check header and footer files
|
|
if ! [ -e "$headerfile" ]; then
|
|
echo "Missing _header.html in $srcdir"
|
|
exit 1
|
|
fi
|
|
if ! [ -e "$footerfile" ]; then
|
|
echo "Missing _footer.html in $srcdir"
|
|
exit 1
|
|
fi
|
|
|
|
if ! [ -e "$metafile" ]; then
|
|
echo "Missing _metadata.html in $srcdir"
|
|
exit 1
|
|
fi
|
|
|
|
# Remove junk from {header,footer} files
|
|
sed 's/<header.*>//' "$headerfile" > "$tmpsed"
|
|
mv "$tmpsed" "$headerfile"
|
|
sed 's/<\/header>//' "$headerfile" > "$tmpsed"
|
|
mv "$tmpsed" "$headerfile"
|
|
sed 's/<footer.*>//' "$footerfile" > "$tmpsed"
|
|
mv "$tmpsed" "$footerfile"
|
|
sed 's/<\/footer>//' "$footerfile" > "$tmpsed"
|
|
mv "$tmpsed" "$footerfile"
|
|
|
|
# Remove any junk from the domain eg. [https://]domain.com[/]
|
|
domain="$(echo "$2" | sed -e 's/^https*:\/\///' -e 's/\/$//')"
|
|
|
|
# Save the real url
|
|
url="$prefix"'://'"$domain"
|
|
|
|
if [ "$PRINT_HELP" ]; then
|
|
usage
|
|
exit 0
|
|
fi
|
|
if [ "$VERBOSE" ]; then
|
|
set -x
|
|
fi
|
|
|
|
if ! [ -d "$srcdir" ]; then
|
|
echo "Error: missing source direcotry"
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
rm -rf "$destdir"
|
|
mkdir -p "$destdir"
|
|
cp -r "$srcdir"/* "$destdir"
|
|
rm -f "$destdir"/_header.html "$destdir"/_footer.html "$destdir"/_metadata.html
|
|
|
|
# Generate an ordered (by open time) file list
|
|
find "$srcdir" -type f -exec ls -1t {} + | awk '/^.*\/[^_].+\.(md|html)$/' > "$objlist"
|
|
rm -f "$objdate"
|
|
while IFS="" read -r file; do
|
|
stat -c '%y' "$file" >> "$objdate"
|
|
done < "$objlist"
|
|
sed -e "s,^\/*[^\/]*\/,$destdir/," "$objlist" > "$tmpsed"
|
|
mv "$tmpsed" "$objlist"
|
|
|
|
# Convert convertible files
|
|
while IFS="" read -r file; do
|
|
convert "$file"
|
|
done < "$objlist"
|
|
|
|
# Convert the file list to a list that contains the path of all the html files
|
|
sed -e 's/\.md$/\.html/' "$objlist" > "$tmpsed"
|
|
mv "$tmpsed" "$objlist"
|
|
|
|
# Create a list that contains the links to all html files
|
|
linklist="$(mktemp)"
|
|
cp -f "$objlist" "$linklist"
|
|
sed -e "s/^$destdir//" "$linklist" > "$tmpsed"
|
|
mv "$tmpsed" "$linklist"
|
|
|
|
# Insert metadata into <head>
|
|
if ! [ "$SKIP_META" ]; then
|
|
find "$destdir" -name "*.html" |
|
|
while IFS="" read -r file; do
|
|
sed "/<head>/r $metafile" "$file" > "$tmpsed"
|
|
mv "$tmpsed" "$file"
|
|
done
|
|
fi
|
|
|
|
# Prepare index file list
|
|
if ! [ "$SKIP_LIST" ]; then
|
|
tmpfile="$(mktemp)"
|
|
rm -f "$tmpfile"
|
|
cat << EOF >> "$tmpfile"
|
|
<div id="map">
|
|
<h2 id="Pages">$linksec</h2>
|
|
EOF
|
|
count='0'
|
|
while IFS="" read -r line; do
|
|
count=$((count + 1))
|
|
if echo "$line" | grep -q 'index\.html'; then
|
|
continue
|
|
fi
|
|
tfile="$(awk "NR==$count" "$objlist")"
|
|
title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
|
|
if [ -z "$title" ]; then
|
|
title="${tfile##*/}"
|
|
fi
|
|
ldate="$(awk "NR==$count" "$objdate" | sed 's/\s.*$//')"
|
|
printf "<p><a href=\"%s\">%s - %s</a></p>\n" "$line" "$ldate" "$title" >> "$tmpfile"
|
|
done < "$linklist"
|
|
echo '</div>' >> "$tmpfile"
|
|
sed '/<\/body>/i REPLACE' "$destdir"/index.html > "$tmpsed"
|
|
mv "$tmpsed" "$destdir"/index.html
|
|
sed "/^REPLACE/r $tmpfile" "$destdir"/index.html > "$tmpsed"
|
|
mv "$tmpsed" "$destdir"/index.html
|
|
sed 's/^REPLACE//' "$destdir"/index.html > "$tmpsed"
|
|
mv "$tmpsed" "$destdir"/index.html
|
|
rm -f "$tmpfile"
|
|
fi
|
|
|
|
# enclose article inside <article></article>
|
|
find "$destdir" -name "*.html" |
|
|
while IFS="" read -r file; do
|
|
sed -e '/<body>/a <article>' -e '/<\/body>/i </article>' "$file" > "$tmpsed"
|
|
mv "$tmpsed" "$file"
|
|
done
|
|
|
|
# Prepate the header
|
|
if ! [ "$SKIP_HEADER" ]; then
|
|
tmphead="$(mktemp)"
|
|
cp "$headerfile" "$tmphead"
|
|
sed '1s/^/<header>/' "$tmphead" > "$tmpsed"
|
|
mv "$tmpsed" "$tmphead"
|
|
echo '</header>' >> "$tmphead"
|
|
find "$destdir" -name "*.html" |
|
|
while IFS="" read -r file; do
|
|
sed "/<body>/r $tmphead" "$file" > "$tmpsed"
|
|
mv "$tmpsed" "$file"
|
|
done
|
|
rm -f "$tmphead"
|
|
fi
|
|
|
|
# Prepare the footer
|
|
if ! [ "$SKIP_FOOTER" ]; then
|
|
tmpfoot="$(mktemp)"
|
|
cp "$footerfile" "$tmpfoot"
|
|
sed '1s/^/<footer>/' "$tmpfoot" > "$tmpsed"
|
|
mv "$tmpsed" "$tmpfoot"
|
|
echo '</footer>' >> "$tmpfoot"
|
|
find "$destdir" -name "*.html" |
|
|
while IFS="" read -r file; do
|
|
sed '/<\/body>/i REPLACE' "$file" > "$tmpsed"
|
|
mv "$tmpsed" "$file"
|
|
sed "/^REPLACE/r $tmpfoot" "$file" > "$tmpsed"
|
|
mv "$tmpsed" "$file"
|
|
sed 's/^REPLACE//' "$file" > "$tmpsed"
|
|
mv "$tmpsed" "$file"
|
|
done
|
|
rm -f "$tmpfoot"
|
|
fi
|
|
|
|
# Generate sitemap
|
|
if ! [ "$SKIP_SITEMAP" ]; then
|
|
cat << EOF >> "$destdir"/sitemap.xml
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
</urlset>
|
|
EOF
|
|
while IFS="" read -r line; do
|
|
sed "/<\/urlset>/i \
|
|
<url><loc>$line<\/loc><\/url>" "$destdir"/sitemap.xml > "$tmpsed"
|
|
mv "$tmpsed" "$destdir"/sitemap.xml
|
|
done < "$linklist"
|
|
sed 's/^<url>/\t<url>/' "$destdir"/sitemap.xml > "$tmpsed"
|
|
mv "$tmpsed" "$destdir"/sitemap.xml
|
|
fi
|
|
|
|
# Generate atom feed
|
|
if ! [ "$SKIP_FEED" ]; then
|
|
feed="$destdir"/atom.xml
|
|
cat << EOF >> "$feed"
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<title>$blog_title</title>
|
|
<subtitle>$blog_desc</subtitle>
|
|
<link href="$url/atom.xml/" rel="self" />
|
|
<link href="$url/" />
|
|
<id>$url</id>
|
|
<updated>$(date -Is)</updated>
|
|
EOF
|
|
count='0'
|
|
while IFS="" read -r line; do
|
|
count=$((count + 1))
|
|
if [ "$blog_nmax" -gt '0' ]; then
|
|
if [ $count -gt "$blog_nmax" ]; then
|
|
break
|
|
fi
|
|
fi
|
|
tfile="$(awk "NR==$count" "$objlist")"
|
|
title="$(sed -E -n 's/.*<title>\s*(.+)\s*<\/title>.*/\1/p' "$tfile")"
|
|
if [ -z "$title" ]; then
|
|
title="${tfile##*/}"
|
|
fi
|
|
ldate="$(awk "NR==$count" "$objdate")"
|
|
{
|
|
printf '\t<entry>\n'
|
|
printf '\t\t<title>%s</title>\n' "$title"
|
|
printf '\t\t<link href="%s" />\n' "$line"
|
|
printf '\t\t<id>%s</id>\n' "$line"
|
|
printf '\t\t<updated>%s</updated>\n' "$ldate"
|
|
printf '\t</entry>\n'
|
|
} >> "$feed"
|
|
done < "$linklist"
|
|
|
|
printf '</feed>\n' >> "$feed"
|
|
fi
|
|
|
|
rm -f "$objlist" "$linklist" "$objdate"
|
|
|
|
exit 0
|
|
|