Skip to content
Snippets Groups Projects
preprocMd.sh 3 KiB
Newer Older
RESEARCH_BUILD="$RESEARCH_ROOT/build"
mkdir -p $RESEARCH_BUILD
baseurl='https://www.mat.tuhh.de'
for f in $RESEARCH_ROOT/topics/*.md
do
    echo $f
    filename=$(basename -- "$f")
    echo "Preprocessing $filename..."
    #extract title of topic
    title="## `head -n 1 $RESEARCH_BUILD/$filename | sed -e "s;#\s*;\[;g" -e "s;\(.*\);\1\](${filename%.md}.html);g"`"
    # prepare index
    # append research topic to index
    echo "Creating index.md ..."
    echo -e "\n$title\n" >>  $RESEARCH_BUILD/index.md
    echo "Replacing working group keys..."
    namelist=`grep -h -i -m 1 -r "###\s*Working Groups:" $RESEARCH_BUILD/$filename | sed -e 's/^###\s*Working Groups:\s*//I' -e 's/\s*,\s*/\n/g'  | sort -u`
    echo "Found the following keys: " $namelist
    for wg in $namelist
        fullname=`wget -qO- $baseurl/forschung/$wg | grep h1 | sed -e "s;<h1>\s*\(.*\)</h1>.*$;\1;g" | sed -e 's;^[ \t]*;;'`
        echo "Found working group $fullname"
        echo -e "\n$title\n" >> $RESEARCH_BUILD/$wg.md

        sed -i "s;### Working Groups:\(.*\)$wg\(.*\);### Working Groups:\1\[$fullname\]($wg.html)\2;g" $RESEARCH_BUILD/$filename  
        #sed -i "s;### Working Groups:\(.*\)$wg\(.*\);### Working Groups:\1\[$fullname\]($baseurl/forschung/$wg)\2;g" $RESEARCH_BUILD/$filename  #uncomment this line for linking the mat.tuhh.de webpage
    echo "Replacing collaborator keys..."
    namelist=`grep -h -i -m 1 -r "###\s*Collaborators (MAT):" $RESEARCH_BUILD/$filename | sed -e 's/^###\s*Collaborators (MAT):\s*//I' -e 's/\s*,\s*/\n/g'  | sort -u`
    echo "Found the following keys: " $namelist
    for name in $namelist
    do
        # pipeline to get full name of staff-member from mat-homepage
        # -> wget the staff homepage of $name
        # -> grep the line with the <h1>-tag, something like <h1>Fabian Gabel, M. Sc.</h1><div class='staffIntro'><p><img src='/home/fgabel/images/portrait.png' title='Foto von Fabian Gabel, M. Sc.' class='staffPicture'></p><div class='staffContact'>
        # -> strip the string such that only the portion between <h1></h1> remains
        # -> remove leading spaces
        wget -qO- $baseurl/home/$name/?homepage_id=$name > page.html
        iconv -f WINDOWS-1252 -t UTF-8 ./page.html > ./utf.html
        fullname=`grep h1 ./utf.html |  sed -e "s/<h1>\s*\(.*\)<\/h1>.*$/\1/g" | sed -e 's/^[ \t]*//'`
        echo "Found collaborator $fullname" 
        sed -i "s;$name;\[$fullname\]($name.html);g" $RESEARCH_BUILD/$filename 
        #sed -i "s;$name;\[$fullname\]($baseurl/home/$name);g" $RESEARCH_BUILD/$filename #uncomment this line for linking the mat.tuhh.de webpage
        echo -e "\n$title\n" >> $RESEARCH_BUILD/$name.md
# adpat img path (prefix a dot)
sed -i "s;\](/img/;\](./img/;g" $RESEARCH_BUILD/$filename