preprocIndexMd.sh

#!/bin/bash

# generate namelist of collaborators from topic file
original="index.md"
filename="index.md"
build_dir="./build"
baseurl='https://www.mat.tuhh.de'

#make output copy
mkdir -p build
echo "Preprocessing $filename..."
cp $original $build_dir/$filename

#replace working group
echo "Replacing working group keys..."
for wg in {aa,cm,dm,nm,st}
do
    fullname=`wget -qO- $baseurl/forschung/$wg | grep h1 | sed -e "s;<h1>\s*\(.*\)</h1>.*$;\1;g" | sed -e 's;^[ \t]*;;'`
    echo "Found working group $fullname"
    sed -i "s;### Working Groups:\(.*\)$wg\(.*\);### Working Groups:\1\[$fullname\]($baseurl/forschung/$wg)\2;g" $build_dir/$filename
done


echo "Replacing collaborator keys..."
namelist=`grep -h -i -m 1 -r "###\s*Collaborators (MAT):" $build_dir/$filename | sed -e 's/^###\s*Collaborators (MAT):\s*//I' -e 's/\s*,\s*/\n/g'  | sort -u`
for name in $namelist
do
    # pipeline to get full name of staff-member from mat-homepage
    # -> wget the staff homepage of $name
    # -> grep the line with the <h1>-tag, something like <h1>Fabian Gabel, M. Sc.</h1><div class='staffIntro'><p><img src='/home/fgabel/images/portrait.png' title='Foto von Fabian Gabel, M. Sc.' class='staffPicture'></p><div class='staffContact'>
    # -> strip the string such that only the portion between <h1></h1> remains
    # -> remove leading spaces
    wget -qO- $baseurl/home/$name/?homepage_id=$name > page.html
    iconv -f WINDOWS-1252 -t UTF-8 ./page.html > ./utf.html
    fullname=`grep h1 ./utf.html |  sed -e "s/<h1>\s*\(.*\)<\/h1>.*$/\1/g" | sed -e 's/^[ \t]*//'`
    echo "Found collaborator $fullname"

    # replace name in index.md file

    sed -i "s;$name;\[$fullname\]($name.html);g" $build_dir/$filename
    #sed -i "s;$name;\[$fullname\]($baseurl/home/$name);g" $build_dir/$filename #uncomment this line for linking the mat.tuhh.de webpage
done