#!/bin/bash # get_static.sh # Preprocessor for staff keys in .md topic file # Replaces key fgabel by "Fabian Gabel, M.Sc." according to Webpage content # If Build_dir is passed, titles are appended to corresponding files # Note: mat.tuhh.de uses WINDOWS-1252 character encoding # processing command line arguments if [ $# -lt 2 ] then RESEARCH_BUILD=`pwd`"/static" echo -e "No building directory was specified." else RESEARCH_BUILD="$2" fi mkdir -p $RESEARCH_BUILD echo -e "Building into directory $RESEARCH_BUILD ..." filename=$(basename -- "$1") baseurl='https://www.mat.tuhh.de' # start preprocessing # -- leave original untouched, only work with copy in $RESEARCH_BUILD mkdir -p $RESEARCH_BUILD/home echo "Preprocessing collaborators in file $RESEARCH_BUILD/$filename ..." namelist=`grep -h -i -m 1 -r "###\s*Collaborators (MAT):" $1 | sed -e 's/^###\s*Collaborators (MAT):\s*//I' -e 's/\s*,\s*/\n/g' | sort -u` echo "Found the following keys: " $namelist echo "Replacing collaborator keys ..." for name in $namelist do echo $name # pipeline to get full name of staff-member from mat-homepage # -> wget the staff homepage of $name # -> grep the line with the <h1>-tag, something like <h1>Fabian Gabel, M. Sc.</h1><div class='staffIntro'><p><img src='/home/fgabel/images/portrait.png' title='Foto von Fabian Gabel, M. Sc.' class='staffPicture'></p><div class='staffContact'> # -> strip the string such that only the portion between <h1></h1> remains # -> remove leading spaces wget -qO- $baseurl/home/$name/?homepage_id=$name > $RESEARCH_BUILD/home/$name.tmp iconv -f WINDOWS-1252 -t UTF-8 $RESEARCH_BUILD/home/$name.tmp > $RESEARCH_BUILD/home/$name.html done # start preprocessing # -- leave original untouched, only work with copy in $RESEARCH_BUILD # replace working group mkdir -p $RESEARCH_BUILD/forschung echo "Replacing working group keys..." namelist=`grep -h -i -m 1 -r "###\s*Working Groups:" $1 | sed -e 's/^###\s*Working Groups:\s*//I' -e 's/\s*,\s*/\n/g' | sort -u` echo "Found the following keys: " $namelist for wg in $namelist do echo $wg wget -qO- $baseurl/forschung/$wg > $RESEARCH_BUILD/forschung/$wg.tmp iconv -f ISO-8859-1 -t UTF-8 $RESEARCH_BUILD/forschung/$wg.tmp > $RESEARCH_BUILD/forschung/$wg.html done