preproc_staff.sh

#!/bin/bash
# preproc_staff.sh
# Preprocessor for staff keys in .md topic file
# Replaces key fgabel by "Fabian Gabel, M.Sc." according to Webpage content
# If Build_dir is passed, titles are appended to corresponding files
# Note: mat.tuhh.de uses WINDOWS-1252 character encoding

# processing command line arguments
if [ $# -lt 2 ]
then
    RESEARCH_BUILD=`pwd`"/build"
    echo -e "No building directory was specified."
else
    RESEARCH_BUILD="$2"
fi
mkdir -p $RESEARCH_BUILD
echo -e "Building into directory $RESEARCH_BUILD ..."

filename=$(basename -- "$1")
baseurl='https://www.mat.tuhh.de'
#make output copy
echo "Preprocessing $filename..."
if [ -f "$RESEARCH_BUILD/$filename" ]
then
    echo "File exists, performing preprocessing in place"
else
    cp $1 $RESEARCH_BUILD/$filename
fi

#extract title of topic
echo "Extracting title of $1..."
title=`$RESEARCH_ROOT/bin/extract_title.sh $RESEARCH_BUILD/$filename`
echo "Title of Topic: $title"

# start preprocessing 
# -- leave original untouched, only work with copy in $RESEARCH_BUILD
echo "Preprocessing collaborators in file $RESEARCH_BUILD/$filename ..."
namelist=`grep -h -i -m 1 -r "###\s*Collaborators (MAT):" $RESEARCH_BUILD/$filename | sed -e 's/^###\s*Collaborators (MAT):\s*//I' -e 's/\s*,\s*/\n/g'  | sort -u`
echo "Found the following keys: " $namelist 
echo "Replacing collaborator keys ..."
for name in $namelist
do
    # pipeline to get full name of staff-member from mat-homepage
    # -> wget the staff homepage of $name
    # -> grep the line with the <h1>-tag, something like <h1>Fabian Gabel, M. Sc.</h1><div class='staffIntro'><p><img src='/home/fgabel/images/portrait.png' title='Foto von Fabian Gabel, M. Sc.' class='staffPicture'></p><div class='staffContact'>
    # -> strip the string such that only the portion between <h1></h1> remains
    # -> remove leading spaces
    wget -qO- $baseurl/home/$name/?homepage_id=$name > page.html
    iconv -f WINDOWS-1252 -t UTF-8 ./page.html > ./utf.html
    fullname=`grep h1 ./utf.html |  sed -e "s/<h1>\s*\(.*\)<\/h1>.*$/\1/g" | sed -e 's/^[ \t]*//'`
    rm -rf {utf,page}.html

    echo "Found collaborator $fullname" 

    # replace name in .md file
    sed -i "s;$name;\[$fullname\]($name.html);g" $RESEARCH_BUILD/$filename 
    #sed -i "s;$name;\[$fullname\]($baseurl/home/$name);g" $RESEARCH_BUILD/$filename #uncomment this line for linking the mat.tuhh.de webpage

    # delete external collaborators if left empty
    sed -i -e '/###\s*Collaborators (External):\s*$/ d' $RESEARCH_BUILD/$filename

    if [ $# -gt 1 ]
    then
        # append research to staffile
        echo -e "\n$title\n" >> $RESEARCH_BUILD/$name.md
    fi
done

# adpat img path (prefix a dot)
sed -i "s;\](/img/;\](./img/;g" $RESEARCH_BUILD/$filename