preprocMd.sh

#!/bin/bash

RESEARCH_BUILD="$RESEARCH_ROOT/build"
mkdir -p $RESEARCH_BUILD
baseurl='https://www.mat.tuhh.de'


for f in $RESEARCH_ROOT/topics/*.md
do
    echo $f
    filename=$(basename -- "$f")
    #make output copy
    echo "Preprocessing $filename..."
    cp $f $RESEARCH_BUILD/$filename

    #extract title of topic
    title="## `head -n 1 $RESEARCH_BUILD/$filename | sed -e "s;#\s*;\[;g" -e "s;\(.*\);\1\](${filename%.md}.html);g"`"

    # prepare index
    # append research topic to index
    echo "Creating index.md ..."
    echo -e "\n$title\n" >>  $RESEARCH_BUILD/index.md

    #replace working group
    echo "Replacing working group keys..."
    namelist=`grep -h -i -m 1 -r "###\s*Working Groups:" $RESEARCH_BUILD/$filename | sed -e 's/^###\s*Working Groups:\s*//I' -e 's/\s*,\s*/\n/g'  | sort -u`
    echo "Found the following keys: " $namelist
    for wg in $namelist
    do
        #extract full name from tuhh-webpage
        fullname=`wget -qO- $baseurl/forschung/$wg | grep h1 | sed -e "s;<h1>\s*\(.*\)</h1>.*$;\1;g" | sed -e 's;^[ \t]*;;'`

        echo "Found working group $fullname"

        # append research to staffiles
        echo -e "\n$title\n" >> $RESEARCH_BUILD/$wg.md

        sed -i "s;### Working Groups:\(.*\)$wg\(.*\);### Working Groups:\1\[$fullname\]($wg.html)\2;g" $RESEARCH_BUILD/$filename
        #sed -i "s;### Working Groups:\(.*\)$wg\(.*\);### Working Groups:\1\[$fullname\]($baseurl/forschung/$wg)\2;g" $RESEARCH_BUILD/$filename  #uncomment this line for linking the mat.tuhh.de webpage
    done

    echo "Replacing collaborator keys..."
    namelist=`grep -h -i -m 1 -r "###\s*Collaborators (MAT):" $RESEARCH_BUILD/$filename | sed -e 's/^###\s*Collaborators (MAT):\s*//I' -e 's/\s*,\s*/\n/g'  | sort -u`
    echo "Found the following keys: " $namelist
    for name in $namelist
    do
        # pipeline to get full name of staff-member from mat-homepage
        # -> wget the staff homepage of $name
        # -> grep the line with the <h1>-tag, something like <h1>Fabian Gabel, M. Sc.</h1><div class='staffIntro'><p><img src='/home/fgabel/images/portrait.png' title='Foto von Fabian Gabel, M. Sc.' class='staffPicture'></p><div class='staffContact'>
        # -> strip the string such that only the portion between <h1></h1> remains
        # -> remove leading spaces
        wget -qO- $baseurl/home/$name/?homepage_id=$name > page.html
        iconv -f WINDOWS-1252 -t UTF-8 ./page.html > ./utf.html
        fullname=`grep h1 ./utf.html |  sed -e "s/<h1>\s*\(.*\)<\/h1>.*$/\1/g" | sed -e 's/^[ \t]*//'`

        echo "Found collaborator $fullname"

        # replace name in .md file
        sed -i "s;$name;\[$fullname\]($name.html);g" $RESEARCH_BUILD/$filename
        #sed -i "s;$name;\[$fullname\]($baseurl/home/$name);g" $RESEARCH_BUILD/$filename #uncomment this line for linking the mat.tuhh.de webpage

        # append research to staffiles
        echo -e "\n$title\n" >> $RESEARCH_BUILD/$name.md
    done

# adpat img path (prefix a dot)
sed -i "s;\](/img/;\](./img/;g" $RESEARCH_BUILD/$filename
done