The URI of TUHH Docker Registry changed from "docker.rz.tu-harburg.de:5000" to "docker.rz.tu-harburg.de". Please update your gitlab-ci.yml files if you use images from this registry.

Commit 66e10fe8 authored by Gerrit Erichsen's avatar Gerrit Erichsen

Some comments and more concise naming. nothing of importance done here

parent 02d796cd
......@@ -3,7 +3,17 @@
Created on Thu Aug 29 08:00:58 2019
@author: Gerrit Erichsen
@brief:
@brief: script to evaluate weather data published at
@details: This script is meant to make a somewhat concise evaluation of the
quality of the data obtained from the DWD's COSMO model and published
in an easy access format under <link>.
Therefore the data is compared to the data of DWD's weather
observation stations taken from DWD's climate data center (CDC).
The grid cell with its center coordinates closest to the coordinates
of the observation station is used to make this comparison.
@remarks: This is neither well-coded, well-commented, nor pretty, as it was
hastily implemented. You're welcome to make changes as you like.
@license: BSD license
"""
from station import WeatherStation
......@@ -316,119 +326,122 @@ if __name__ == '__main__':
annotation = 'n: %i' % (len(rmse))
fig1= plt.figure(figsize=(widthInInch, heightInInch))
plt1 = fig1.add_subplot(131)
plt2 = fig1.add_subplot(132)
plt3 = fig1.add_subplot(133)
plt1.boxplot(rmse, labels=xLabelBox1)
plt1.set_title("RMSE")
# plotting the statistics ------------------------------------------
figStat= plt.figure(figsize=(widthInInch, heightInInch))
plt_stats1 = figStat.add_subplot(131)
plt_stat2 = figStat.add_subplot(132)
plt_stat3 = figStat.add_subplot(133)
plt_stats1.boxplot(rmse, labels=xLabelBox1)
plt_stats1.set_title("RMSE")
if dataType == 'temperature':
plt1.set_ylabel("K")
plt_stats1.set_ylabel("K")
elif dataType == 'solar':
plt1.set_ylabel('W / m$^2$')
plt_stats1.set_ylabel('W / m$^2$')
else:
plt1.set_ylabel('m / s')
plt1.annotate(annotation, (0.6, 0.95 * np.max(rmse)), fontsize=8)
plt2.boxplot(mae, labels=xLabelBox1)
plt2.set_title("MAE")
plt3.boxplot(mbe, labels=xLabelBox1)
plt3.set_title("MBE")
plt_stats1.annotate(annotation, (0.6, 0.95 * np.max(rmse)), \
fontsize=8)
plt_stat2.boxplot(mae, labels=xLabelBox1)
plt_stat2.set_title("MAE")
plt_stat3.boxplot(mbe, labels=xLabelBox1)
plt_stat3.set_title("MBE")
figFileName = pathFigs + year + '_' + dataType + '_stats.png'
fig1.subplots_adjust(wspace=0.3, hspace=0.3)
fig1.savefig(figFileName, dpi=600, format='png')
figStat.subplots_adjust(wspace=0.3, hspace=0.3)
figStat.savefig(figFileName, dpi=600, format='png')
plt.close()
plt.clf()
if len(stations2) > 0:
fig2= plt.figure(figsize=(widthInInch, heightInInch))
plt1 = fig2.add_subplot(131)
plt2 = fig2.add_subplot(132)
plt3 = fig2.add_subplot(133)
plt1.boxplot(rmse2, labels=xLabelBox2)
plt1.set_title("RMSE")
figStat2= plt.figure(figsize=(widthInInch, heightInInch))
pltStat4 = figStat2.add_subplot(131)
pltStat5 = figStat2.add_subplot(132)
pltStat6 = figStat2.add_subplot(133)
pltStat4.boxplot(rmse2, labels=xLabelBox2)
pltStat4.set_title("RMSE")
if dataType == 'temperature':
plt1.set_ylabel("K")
pltStat4.set_ylabel("K")
elif dataType == 'solar':
plt1.set_ylabel('W / m$^2$')
pltStat4.set_ylabel('W / m$^2$')
else:
plt1.set_ylabel('Deg')
plt1.annotate(annotation, (0.6, 0.95 * np.max(rmse2)),
fontsize=8)
plt2.boxplot(mae2, labels=xLabelBox2)
plt2.set_title("MAE")
plt3.boxplot(mbe2, labels=xLabelBox2)
plt3.set_title("MBE")
pltStat4.set_ylabel('Deg')
pltStat4.annotate(annotation, (0.6, 0.95 * np.max(rmse2)),
fontsize=8)
pltStat5.boxplot(mae2, labels=xLabelBox2)
pltStat5.set_title("MAE")
pltStat6.boxplot(mbe2, labels=xLabelBox2)
pltStat6.set_title("MBE")
figFileName = pathFigs + year + '_' + dataType + '_stats2.png'
fig2.subplots_adjust(wspace=0.3, hspace=0.3)
fig2.savefig(figFileName, dpi=600, format='png')
figStat2.subplots_adjust(wspace=0.3, hspace=0.3)
figStat2.savefig(figFileName, dpi=600, format='png')
plt.close()
plt.clf()
#finding out, who did worst/best -----------------------------------
worstRMSE, worstMAE, worstMBE = findWorstStation(stations)
bestRMSE, bestMAE, bestMBE = findBestStation(stations)
#plotting worst ------------------------------------------------
#plotting worst ----------------------------------------------------
figFileName = pathFigs + year + '_' + dataType + '_worst.png'
heightInInch2 = heightInInch
if dataType != 'temperature':
heightInInch2 *= 2.
fig3 = plt.figure(figsize=(widthInInch, heightInInch2))
figScatterW = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType != 'temperature':
plt1 = fig3.add_subplot(231)
plt2 = fig3.add_subplot(232)
plt3 = fig3.add_subplot(233)
plt4 = fig3.add_subplot(234)
plt5 = fig3.add_subplot(235)
plt6 = fig3.add_subplot(236)
pltSctW1 = figScatterW.add_subplot(231)
pltSctW2 = figScatterW.add_subplot(232)
pltSctW3 = figScatterW.add_subplot(233)
pltSctW4 = figScatterW.add_subplot(234)
pltSctW5 = figScatterW.add_subplot(235)
pltSctW6 = figScatterW.add_subplot(236)
else:
plt1 = fig3.add_subplot(131)
plt2 = fig3.add_subplot(132)
plt3 = fig3.add_subplot(133)
pltSctW1 = figScatterW.add_subplot(131)
pltSctW2 = figScatterW.add_subplot(132)
pltSctW3 = figScatterW.add_subplot(133)
stations[worstRMSE].createPlots(plt1, True)
stations[worstMAE].createPlots(plt2)
stations[worstMBE].createPlots(plt3)
stations[worstRMSE].createPlots(pltSctW1, True)
stations[worstMAE].createPlots(pltSctW2)
stations[worstMBE].createPlots(pltSctW3)
if len(stations2) > worstRMSE:
stations2[worstRMSE].createPlots(plt4, True)
stations2[worstRMSE].createPlots(pltSctW4, True)
if len(stations2) > worstMAE:
stations2[worstMAE].createPlots(plt5)
stations2[worstMAE].createPlots(pltSctW5)
if len(stations2) > worstMBE:
stations2[worstMBE].createPlots(plt6)
fig3.subplots_adjust(wspace=0.3, hspace=0.3)
fig3.savefig(figFileName, dpi=600, format='png')
stations2[worstMBE].createPlots(pltSctW6)
figScatterW.subplots_adjust(wspace=0.3, hspace=0.3)
figScatterW.savefig(figFileName, dpi=600, format='png')
plt.close()
plt.clf()
# plotting best -----------------------------------------------
# plotting best ----------------------------------------------------
figFileName = pathFigs + year + '_' + dataType + '_best.png'
heightInInch2 = heightInInch
if dataType != 'temperature':
heightInInch2 *= 2.
fig4 = plt.figure(figsize=(widthInInch, heightInInch2))
figScatterB = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType != 'temperature':
plt1 = fig4.add_subplot(231)
plt2 = fig4.add_subplot(232)
plt3 = fig4.add_subplot(233)
plt4 = fig4.add_subplot(234)
plt5 = fig4.add_subplot(235)
plt6 = fig4.add_subplot(236)
pltSctB1 = figScatterB.add_subplot(231)
pltSctB2 = figScatterB.add_subplot(232)
pltSctB3 = figScatterB.add_subplot(233)
pltSctB4 = figScatterB.add_subplot(234)
pltSctB5 = figScatterB.add_subplot(235)
pltSctB6 = figScatterB.add_subplot(236)
else:
plt1 = fig4.add_subplot(131)
plt2 = fig4.add_subplot(132)
plt3 = fig4.add_subplot(133)
stations[bestRMSE].createPlots(plt1)
stations[bestMAE].createPlots(plt2)
stations[bestMBE].createPlots(plt3)
pltSctB1 = fig4.add_subplot(131)
pltSctB2 = fig4.add_subplot(132)
pltSctB3 = fig4.add_subplot(133)
stations[bestRMSE].createPlots(pltSctB1)
stations[bestMAE].createPlots(pltSctB2)
stations[bestMBE].createPlots(pltSctB3)
if len(stations2) > bestRMSE:
stations2[bestRMSE].createPlots(plt4)
stations2[bestRMSE].createPlots(pltSctB4)
if len(stations2) > bestMAE:
stations2[bestMAE].createPlots(plt5)
stations2[bestMAE].createPlots(pltSctB5)
if len(stations2) > bestMBE:
stations2[bestMBE].createPlots(plt6)
fig4.subplots_adjust(wspace=0.3, hspace=0.3)
fig4.savefig(figFileName, dpi=600, format='png')
stations2[bestMBE].createPlots(pltSctB6)
figScatterB.subplots_adjust(wspace=0.3, hspace=0.3)
figScatterB.savefig(figFileName, dpi=600, format='png')
plt.close()
plt.clf()
#- year's statistics -------------------------------------------------
#- year's statistics ---------------------------------------------------
if createMap:
figFileName = pathFigs + year + '_' + dataType + '_map.png'
if dataType == 'temperature':
......@@ -490,4 +503,4 @@ if __name__ == '__main__':
fig.colorbar(image, cax=cax, orientation='vertical')
plt.savefig(figFileName, dpi=600, format='png')
print(totalMean)
print('Finished')
\ No newline at end of file
print('Finished')
......@@ -3,6 +3,16 @@
Created on Thu Aug 29 10:35:29 2019
@author: Gerrit Erichsen
@brief: class to evaluate weather data
@details: This class is meant to be compact data holder and method collection
for the analysis done in evaluate.py.
It holds both the data of climate data center and the model data. from
the model data, the data at the cell assigned. For qulaity measures
the rmse, mae, and mbe are implemented. As the evaluation somewhat
depends on the type, this distinction is made as well.
@remarks: This is neither well-coded, well-commented, nor pretty, as it was
hastily implemented. You're welcome to make changes as you like.
@license: BSD license
"""
import h5py
......@@ -11,105 +21,121 @@ import numpy as np
import matplotlib.pyplot as plt
class WeatherStation:
m_name = ""
m_id = ""
m_lat = ""
m_lon = ""
m_latId = 0
m_lonId = 0
m_measuredData = []
m_modelData = []
m_minutes = []
m_mae = 0.
m_mbe = 0.
m_rmse = 0.
m_obsMax = 0.
m_obsMin = 0.
m_obsAvg = 0.
m_error = 0.2
m_type = 'temperature'
# member variables ---------------------
name = ""
identifier = ""
latitude = ""
longitude = ""
latitudeId = 0
longitudeId = 0
observationData = []
modelData = []
minuteIndicator = []
mae = 0.
mbe = 0.
rmse = 0.
obsMax = 0.
obsMin = 0.
obsAvg = 0.
error = 0.2
type = 'temperature'
# constructor --------------------------------------------------------------
def __init__(self, name, identifier, lat, lon, dataType = 'temperature'):
self.m_name = name
self.m_id = identifier
self.m_lat = lat
self.m_lon = lon
self.m_latId = -1
self.m_lonId = -1
self.m_measuredData = []
self.m_modelData = []
self.m_minutes = []
self.m_mae = 0.
self.m_mbe = 0.
self.m_rmse = 0.
self.m_obsMax = 0.
self.m_obsMin = 0.
self.m_obsAvg = 0.
self.m_error = 0.2
self.m_type = dataType
self.name = name
self.identifier = identifier
self.latitude = lat
self.longitude = lon
self.latitudeId = -1
self.longitudeId = -1
self.observationData = []
self.modelData = []
self.minuteIndicator = []
self.mae = 0.
self.mbe = 0.
self.rmse = 0.
self.obsMax = 0.
self.obsMin = 0.
self.obsAvg = 0.
self.error = 0.2
self.type = dataType
# some simple get functions to access members (out of habit, not needed) ---
def getName(self):
return self.m_name
return self.name
def getIdentifier(self):
return self.m_id
return self.identifier
def getLatitude(self):
return float(self.m_lat)
return float(self.latitude)
def getLongitude(self):
return float(self.m_lon)
return float(self.longitude)
def getLatId(self):
return self.m_latId
return self.latitudeId
def getLonId(self):
return self.m_lonId
return self.longitudeId
def getMeasuredData(self):
return self.m_measuredData
return self.observationData
def getRMSE(self):
return self.m_rmse
return self.rmse
def getMAE(self):
return self.m_mae
return self.mae
def getMBE(self):
return self.m_mbe
def clear(self):
self.m_measuredData = []
self.m_modelData = []
self.m_minutes = []
return self.mbe
def getModelData(self):
data = []
for element in self.m_modelData:
for element in self.modelData:
data.append(element)
return data
def getModelDataLen(self):
return len(self.m_modelData)
return len(self.modelData)
def getModelDataAtIndex(self, index):
if index >= 0 and index < len(self.m_modelData):
return self.m_modelData[index]
if index >= 0 and index < len(self.modelData):
return self.modelData[index]
return 0.
# starting to caluclate things
def getDifferences(self):
""""(void) -> ([float])
function calculates the differences between observed and model data.
afterwards returns the resulting array. this is needed for the statistical
analysis."""
differences = []
totalLength = len(self.m_measuredData)
if (len(self.m_modelData) < totalLength):
totalLength = len(self.m_modelData)
totalLength = len(self.observationData)
if (len(self.modelData) < totalLength):
totalLength = len(self.modelData)
for i in range(0, totalLength):
if (self.m_measuredData[i] > -500. and
self.m_measuredData[i] < 2000.):
value = self.m_measuredData[i] - self.m_modelData[i]
if (self.m_type == 'windDirection'):
if (self.observationData[i] > -500. and
self.observationData[i] < 2000.):
value = self.observationData[i] - self.modelData[i]
if (self.type == 'windDirection'):
if value > 180. :
value -= 360.
elif value < -180. :
value += 360.
elif self.m_type == 'solar' \
or self.m_type == 'diffuseIrridiation' \
or self.m_type == 'globalIrridiation':
elif self.type == 'solar' \
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
# observation data is given in j/cm^2 and model data in
# W/m^2, both at hourly resolution. the hard coded values
# correspond to conversion of theses too
# i.e. 10,000 cm^2 == m^2 and 3600 J == 1 Wh
joulePerCm2ToWhPerM2 = 1. * 10000. / 3600.
observed = self.m_measuredData[i] * joulePerCm2ToWhPerM2
observed = self.observationData[i] * joulePerCm2ToWhPerM2
modeled = 0.
# the solar CDC data is given in true local time, which
# results in odd numbers in a normal time format (UTC).
# therefore the values of CDC need to be interpolated
# for "normal" time format
if i > 0 :
modeled = self.m_modelData[i] * self.m_minutes[i] / 60.\
+ self.m_modelData[i - 1] \
* (60 - self.m_minutes[i - 1]) / 60.
modeled = self.modelData[i] * self.minuteIndicator[i] \
/ 60.\
+ self.modelData[i - 1] \
* (60 - self.minuteIndicator[i - 1]) / 60.
value = observed - modeled
differences.append(value)
return differences;
# translating heights, as it is necessary with the wind data
def getHeightIndices(height):
""""(float) -> (str, str)
function returns indices of COSMO-DE's main planes, that lie
......@@ -149,6 +175,7 @@ class WeatherStation:
elif height >= 345.53:
lower = '44'
return lower, upper
def getHeightFromIndex(index):
""""(str) -> (float)
function returns height of COSMO-DE's main plane with index index"""
......@@ -166,17 +193,21 @@ class WeatherStation:
return 258.21
else:
return 345.53
# clearing data arrays -----------------------------------------------------
def clear(self):
self.observationData = []
self.modelData = []
self.minuteIndicator = []
def setLatLonIds(self, indexLat, indexLon):
self.m_latId = indexLat
self.m_lonId = indexLon
self.latitudeId = indexLat
self.longitudeId = indexLon
def setModelData(self, data):
if len(data) > 0:
self.m_modelData = []
self.m_modelData = data
self.modelData = []
self.modelData = data
def setType(self, dataType):
self.m_type = dataType
self.type = dataType
def readInCdcFile(self, fileName, targetYear, column):
self.m_measureData = [] #clear history -> expercience shows to be necessary
......@@ -188,144 +219,169 @@ class WeatherStation:
for line in content:
entries = line.split(';')
if entries[1].startswith(targetYear):
if self.m_type == 'solar'\
or self.m_type == 'diffuseIrridiation' \
or self.m_type == 'globalIrridiation':
if self.type == 'solar'\
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
hour = int(entries[1][-5:-3])
self.m_minutes.append(int(entries[1][-2:]))
self.minuteIndicator.append(int(entries[1][-2:]))
else:
hour = int(entries[1][-2:])
if hour > lastHour + 1:
i = 0
diff = hour - lastHour - 1
while i < diff:
self.m_measuredData.append(-999.)
self.observationData.append(-999.)
i += 1
elif hour < lastHour and not(hour == 0 and lastHour == 23):
#assuming that there is never an entire day missing
i = 0
diff = 24 - lastHour + hour - 1
while i < diff:
self.m_measuredData.append(-999.)
self.observationData.append(-999.)
i += 1
self.m_measuredData.append(float(entries[column]))
if self.m_type == 'windDirection' and counter == 0:
print(self.m_type, column, entries[column], self.m_measuredData)
self.observationData.append(float(entries[column]))
if self.type == 'windDirection' and counter == 0:
print(self.type, column, entries[column], self.observationData)
lastHour = hour
counter += 1
if len(self.m_measuredData) < 8760:
print(self.m_name, 'only has', len(self.m_measuredData))
self.m_measuredData = []
if len(self.observationData) < 8760:
print(self.name, 'only has', len(self.observationData))
self.observationData = []
def readInModelFile(self, h5FileName, dataType, h5FileName2 = '', \
height = 10.):
self.m_modelData = []
if self.m_name == 'Brocken':
self.modelData = []
if self.name == 'Brocken':
print('Brocken, alter', h5FileName, h5FileName2, height)
if h5FileName2 == '':
with h5py.File(h5FileName, 'r') as h5File:
self.m_modelData =h5File[dataType][:,self.m_latId,self.m_lonId]
self.modelData =h5File[dataType][:,self.latitudeId,self.longitudeId]
else:
data1 = []
data2 = []
height1 = WeatherStation.getHeightFromIndex(h5FileName[-5:-3])
height2 = WeatherStation.getHeightFromIndex(h5FileName2[-5:-3])
with h5py.File(h5FileName, 'r') as h5File1:
data1 = h5File1[dataType][:,self.m_latId,self.m_lonId]
data1 = h5File1[dataType][:,self.latitudeId,self.longitudeId]
with h5py.File(h5FileName2, 'r') as h5File2:
data2 = h5File2[dataType][:,self.m_latId,self.m_lonId]
self.m_modelData = []
data2 = h5File2[dataType][:,self.latitudeId,self.longitudeId]
self.modelData = []
for lower, upper in zip(data1, data2):
self.m_modelData.append(np.interp(height, [height1, height2], \
self.modelData.append(np.interp(height, [height1, height2], \
[lower, upper]))
# calculating all the statistics needed ------------------------------------
def calculateStats(self):
""""(void) -> (void)
kicks of calculation process. call-stack is:
-getDifferences()
-calculateMAE() | calculateMBE() | calculateRMSE()
afterwards operations on solar data and some min/max analysis"""
differences = self.getDifferences();
self.m_mae = WeatherStation.calculateMAE(differences);
self.m_mbe = WeatherStation.calculateMBE(differences);
self.m_rmse = WeatherStation.calculateRMSE(differences);
self.mae = WeatherStation.calculateMAE(differences);
self.mbe = WeatherStation.calculateMBE(differences);
self.rmse = WeatherStation.calculateRMSE(differences);
values = []
for element in self.m_measuredData:
for element in self.observationData:
if (element > -500. and element < 2000.):
values.append(element);
if self.m_type == 'solar' \
or self.m_type == 'diffuseIrridiation' \
or self.m_type == 'globalIrridiation':
if self.type == 'solar' \
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
joulePerCm2ToWhPerM2 = 1. * 10000. / 3600.
values[-1] *= joulePerCm2ToWhPerM2
valueArray = np.array(values);
self.m_obsMin = valueArray.min()
self.m_obsMax = valueArray.max()
self.m_obsAvg = valueArray.mean()
self.obsMin = valueArray.min()
self.obsMax = valueArray.max()
self.obsAvg = valueArray.mean()
def calculateMAE(differences):
""""([float]) -> (np.array)
function returns mean of absolutes of float array"""
return abs(np.array(differences)).mean()
def calculateMBE(differences):
""""([float]) -> (np.array)
function returns mean of float array"""
return np.array(differences).mean()
def calculateRMSE(differences):
""""([float]) -> (np.array)
function returns mean of square-roots of squares of float array"""
return math.sqrt(np.square(np.array(differences)).mean())
# making plots -------------------------------------------------------------
def createPlots(self, fig, printAxis = False):
""""(matplotlib.axes, bool) -> (void)
creates a scatter plot of obersvation data and model data (x and y,
repsectively). the scatter plot is added to the matplotlib axes given
at function call.
@remark: this re-uses some code of the differences() function. may be
this can be done less verbose and more well structed"""
differences = self.getDifferences();
valuesObs = []
valuesObsBounds = []
valuesMod = []
xAxis = []
i = 0
total = len(self.m_measuredData)
if (len(self.m_modelData) < total): total = len(self.m_modelData)
if self.m_type == 'solar' \
or self.m_type == 'diffuseIrridiation' \
or self.m_type == 'globalIrridiation':
total = len(self.observationData)
if (len(self.modelData) < total): total = len(self.modelData)
if self.type == 'solar' \
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
for i in range(0, total):
if (self.m_measuredData[i] > -500. \
and self.m_measuredData[i] < 2000.):
valuesObs.append(self.m_measuredData[i])
valuesObsBounds.append(self.m_error)
valueMod = self.m_modelData[i]
if self.m_type == 'solar' \
or self.m_type == 'diffuseIrridiation' \
or self.m_type == 'globalIrridiation':
if (self.observationData[i] > -500. \
and self.observationData[i] < 2000.):