The URI of TUHH Docker Registry changed from "docker.rz.tu-harburg.de:5000" to "docker.rz.tu-harburg.de". Please update your gitlab-ci.yml files if you use images from this registry.

Commit 746ae958 authored by Gerrit Erichsen's avatar Gerrit Erichsen

BugFixes: Wrong deletion behaviour fixed, by centralizing it (also index comparison was wrong)

Features: Added histograms
Generates plots now, only for different RMSE/MAE/MBE, so number of subplots is reduced.
parent 45ad43de
...@@ -18,8 +18,8 @@ Created on Thu Aug 29 08:00:58 2019 ...@@ -18,8 +18,8 @@ Created on Thu Aug 29 08:00:58 2019
from station import WeatherStation from station import WeatherStation
import dwdfilehelper as dwd import dwdfilehelper as dwd
import math
import numpy as np import numpy as np
import seaborn as sns
import matplotlib import matplotlib
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.cm as cm import matplotlib.cm as cm
...@@ -189,7 +189,7 @@ if __name__ == '__main__': ...@@ -189,7 +189,7 @@ if __name__ == '__main__':
ind = findStationId(stations, station[0]) ind = findStationId(stations, station[0])
if fileName == '': if fileName == '':
stations.remove(stations[ind]) stations.remove(stations[ind])
if ind >= len(stations2): if ind < len(stations2) and len(stations2) > 0:
stations2.remove(stations2[ind]) stations2.remove(stations2[ind])
print("Done", i, "/", total) print("Done", i, "/", total)
i+=1 i+=1
...@@ -203,13 +203,6 @@ if __name__ == '__main__': ...@@ -203,13 +203,6 @@ if __name__ == '__main__':
if len(stations2) > 0: if len(stations2) > 0:
stations2[ind].clear() stations2[ind].clear()
stations[ind].readInCdcFile(fileName, year, cdcColumn) stations[ind].readInCdcFile(fileName, year, cdcColumn)
if (len(stations[ind].getMeasuredData()) == 0):
stations.remove(stations[ind])
if ind >= len(stations2):
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
if (stations[ind].getLatId() < 0) \ if (stations[ind].getLatId() < 0) \
or (dataType == 'solar' and yearAsInt == 2015): or (dataType == 'solar' and yearAsInt == 2015):
bLat, bLon = dwd.findClosestCell(lat, lon, \ bLat, bLon = dwd.findClosestCell(lat, lon, \
...@@ -234,13 +227,6 @@ if __name__ == '__main__': ...@@ -234,13 +227,6 @@ if __name__ == '__main__':
fileNameH5l = fileNameH5 + '_' + lower + '.h5' fileNameH5l = fileNameH5 + '_' + lower + '.h5'
stations[ind].readInModelFile(fileNameH5l, dataTypeShort, \ stations[ind].readInModelFile(fileNameH5l, dataTypeShort, \
yearAsInt) yearAsInt)
if len(stations[ind].modelData) == 0:
stations.remove(stations[ind])
if ind >= len(stations2):
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
if dataType == 'solar' or dataType == 'wind': if dataType == 'solar' or dataType == 'wind':
secondType = '' secondType = ''
if (dataType == 'solar'): if (dataType == 'solar'):
...@@ -254,12 +240,6 @@ if __name__ == '__main__': ...@@ -254,12 +240,6 @@ if __name__ == '__main__':
station[2], station[3], \ station[2], station[3], \
secondType)) secondType))
stations2[ind].readInCdcFile(fileName, year, cdcColumn + 1) stations2[ind].readInCdcFile(fileName, year, cdcColumn + 1)
if (len(stations2[ind].observationData) == 0):
stations.remove(stations[ind])
tations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
if dataType == 'solar': if dataType == 'solar':
stations2[ind].readInModelFile(fileName2H5, \ stations2[ind].readInModelFile(fileName2H5, \
dataTypeShort2,\ dataTypeShort2,\
...@@ -282,13 +262,23 @@ if __name__ == '__main__': ...@@ -282,13 +262,23 @@ if __name__ == '__main__':
stations2[ind].readInModelFile(fileNameH5l, \ stations2[ind].readInModelFile(fileNameH5l, \
dataTypeShort2, \ dataTypeShort2, \
yearAsInt) yearAsInt)
if (len(stations2[ind].modelData) == 0): if len(stations[ind].modelData) == 0 \
or len(stations[ind].observationData) == 0:
stations.remove(stations[ind])
if ind < len(stations2) and len(stations2) > 0:
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
elif ind < len(stations2) and len(stations2) > 0:
if len(stations2[ind].modelData) == 0 \
or len(stations2[ind].observationData) == 0:
stations.remove(stations[ind]) stations.remove(stations[ind])
stations2.remove(stations2[ind]) stations2.remove(stations2[ind])
print("Done", i, "/", total) print("Done", i, "/", total)
i+=1 i+=1
continue continue
print("Done read", i, "/", total) print("Done read", i, "/", total)
i+=1 i+=1
...@@ -332,7 +322,7 @@ if __name__ == '__main__': ...@@ -332,7 +322,7 @@ if __name__ == '__main__':
print("Done stats1", i, "/", total) print("Done stats1", i, "/", total)
i += 1 i += 1
i = 1 i = 1
total = len(stations) total = len(stations2)
for station in stations2: for station in stations2:
station.calculateStats() station.calculateStats()
rmse2.append(station.getRMSE()) rmse2.append(station.getRMSE())
...@@ -406,40 +396,99 @@ if __name__ == '__main__': ...@@ -406,40 +396,99 @@ if __name__ == '__main__':
plt.close() plt.close()
figStat2.clf() figStat2.clf()
#finding out, who did worst/best ----------------------------------- #finding out, who did worst/best ----------------------------------
worstRMSE, worstMAE, worstMBE = findWorstStation(stations) worstRMSE, worstMAE, worstMBE = findWorstStation(stations)
bestRMSE, bestMAE, bestMBE = findBestStation(stations) bestRMSE, bestMAE, bestMBE = findBestStation(stations)
#plotting worst ---------------------------------------------------- nWorstPlots = 1
if (worstMAE != worstRMSE): nWorstPlots += 1
if (worstMBE != worstRMSE and worstMBE != worstMAE): nWorstPlots += 1
nBestPlots = 1
if (bestMAE != bestRMSE): nBestPlots += 1
if (bestMBE != bestRMSE and bestMBE != bestMAE): nBestPlots += 1
worstRMSE2 = 0
worstMAE2 = 0
worstMBE2 = 0
nWorstPlots2 = 0
bestRMSE2 = 0
bestMAE2 = 0
bestMBE2 = 0
nBestPlots2 = 0
if len(stations2) > 0:
worstRMSE2, worstMAE2, worstMBE2 = findWorstStation(stations2)
bestRMSE2, bestMAE2, bestMBE2 = findBestStation(stations2)
nWorstPlots2 = 1
if (worstMAE2 != worstRMSE2): nWorstPlots2 += 1
if (worstMBE2 != worstRMSE2 and worstMBE2 != worstMAE2): nWorstPlots2 += 1
nBestPlots2 = 1
if (bestMAE2 != bestRMSE2): nBestPlots2 += 1
if (bestMBE2 != bestRMSE2 and bestMBE2 != bestMAE2): nBestPlots2 += 1
#plotting worst ---------------------------------------------------
figFileName = pathFigs + year + '_' + dataType + '_worst.png' figFileName = pathFigs + year + '_' + dataType + '_worst.png'
heightInInch2 = heightInInch heightInInch2 = heightInInch
if dataType != 'temperature': if dataType != 'temperature':
heightInInch2 *= 2. heightInInch2 *= 2.
figScatterW = plt.figure(figsize=(widthInInch, heightInInch2)) figScatterW = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType != 'temperature': if dataType != 'temperature':
pltSctW1 = figScatterW.add_subplot(231) pltIndex = 1
pltSctW2 = figScatterW.add_subplot(232) cols = max(nWorstPlots, nWorstPlots2)
pltSctW3 = figScatterW.add_subplot(233) pltSctW1 = figScatterW.add_subplot(2,cols,pltIndex)
pltSctW4 = figScatterW.add_subplot(234) pltIndex = 2
pltSctW5 = figScatterW.add_subplot(235) if worstMAE != worstRMSE:
pltSctW6 = figScatterW.add_subplot(236) pltSctW2 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex += 1
if worstMBE != worstRMSE and worstMBE != worstMAE:
pltSctW3 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex = cols + 1
pltSctW4 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex += 1
if worstMAE2 != worstRMSE2:
pltSctW5 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex += 1
if worstMBE2 != worstRMSE2 and worstMBE2 != worstMAE2:
pltSctW6 = figScatterW.add_subplot(2,cols,pltIndex)
else: else:
pltSctW1 = figScatterW.add_subplot(131) pltIndex = 1
pltSctW2 = figScatterW.add_subplot(132) cols = nWorstPlots
pltSctW3 = figScatterW.add_subplot(133) pltSctW1 = figScatterW.add_subplot(1,cols,pltIndex)
pltIndex = 2
if worstMAE != worstRMSE:
pltSctW2 = figScatterW.add_subplot(1,cols,pltIndex)
pltIndex += 1
if worstMBE != worstRMSE and worstMBE != worstMAE:
pltSctW3 = figScatterW.add_subplot(1,cols,pltIndex)
stations[worstRMSE].createPlots(pltSctW1, True) stations[worstRMSE].createPlots(pltSctW1, True)
stations[worstMAE].createPlots(pltSctW2) if worstMAE != worstRMSE:
stations[worstMBE].createPlots(pltSctW3) stations[worstMAE].createPlots(pltSctW2)
if len(stations2) > worstRMSE: if worstMBE != worstRMSE and worstMBE != worstMAE:
stations2[worstRMSE].createPlots(pltSctW4, True) stations[worstMBE].createPlots(pltSctW3)
if len(stations2) > worstMAE: if len(stations2) > 0:
stations2[worstMAE].createPlots(pltSctW5) stations2[worstRMSE2].createPlots(pltSctW4, True)
if len(stations2) > worstMBE: if len(stations2) > 0 and worstMAE2 != worstRMSE2:
stations2[worstMBE].createPlots(pltSctW6) stations2[worstMAE2].createPlots(pltSctW5)
figScatterW.subplots_adjust(wspace=0.3, hspace=0.3) if len(stations2) > 0 and worstMBE2 != worstRMSE2 \
and worstMBE2 != worstMAE2:
stations2[worstMBE2].createPlots(pltSctW6)
space = 0.3
if dataType == 'temperature': space = 0.2
figScatterW.subplots_adjust(wspace=space, hspace=space)
figScatterW.savefig(figFileName, dpi=600, format='png') figScatterW.savefig(figFileName, dpi=600, format='png')
plt.close() plt.close()
figScatterW.clf() figScatterW.clf()
# plotting worst distribution
figFileName = pathFigs + year + '_' + dataType + '_worstDist.png'
figDistW = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType == 'temperature':
stations[worstRMSE].createDistPlots(figDistW.add_subplot(111))
else:
pltDistW1 = figDistW.add_subplot(2, 1, 1)
stations[worstRMSE].createDistPlots(pltDistW1)
pltDistW2 = figDistW.add_subplot(2, 1, 2)
stations2[worstRMSE2].createDistPlots(pltDistW2)
figDistW.subplots_adjust(wspace=space, hspace=space)
figDistW.savefig(figFileName, dpi=600, format='png')
plt.close()
figDistW.clf()
# plotting best ---------------------------------------------------- # plotting best ----------------------------------------------------
figFileName = pathFigs + year + '_' + dataType + '_best.png' figFileName = pathFigs + year + '_' + dataType + '_best.png'
heightInInch2 = heightInInch heightInInch2 = heightInInch
...@@ -447,29 +496,64 @@ if __name__ == '__main__': ...@@ -447,29 +496,64 @@ if __name__ == '__main__':
heightInInch2 *= 2. heightInInch2 *= 2.
figScatterB = plt.figure(figsize=(widthInInch, heightInInch2)) figScatterB = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType != 'temperature': if dataType != 'temperature':
pltSctB1 = figScatterB.add_subplot(231) pltIndex = 1
pltSctB2 = figScatterB.add_subplot(232) cols = max(nBestPlots, nBestPlots2)
pltSctB3 = figScatterB.add_subplot(233) pltSctB1 = figScatterB.add_subplot(2,cols,pltIndex)
pltSctB4 = figScatterB.add_subplot(234) pltIndex = 2
pltSctB5 = figScatterB.add_subplot(235) if bestMAE != bestRMSE:
pltSctB6 = figScatterB.add_subplot(236) pltSctB2 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex += 1
if bestMBE != bestRMSE and bestMBE != bestMAE:
pltSctB3 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex = cols + 1
pltSctB4 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex += 1
if bestMAE2 != bestRMSE2:
pltSctB5 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex += 1
if bestMBE2 != bestRMSE2 and bestMBE2 != bestMAE2:
pltSctB6 = figScatterB.add_subplot(2,cols,pltIndex)
else: else:
pltSctB1 = figScatterB.add_subplot(131) pltIndex = 1
pltSctB2 = figScatterB.add_subplot(132) cols = nBestPlots
pltSctB3 = figScatterB.add_subplot(133) pltSctB1 = figScatterB.add_subplot(1,cols,pltIndex)
stations[bestRMSE].createPlots(pltSctB1) pltIndex = 2
stations[bestMAE].createPlots(pltSctB2) if bestMAE != bestRMSE:
stations[bestMBE].createPlots(pltSctB3) pltSctB2 = figScatterB.add_subplot(1,cols,pltIndex)
if len(stations2) > bestRMSE: pltIndex += 1
stations2[bestRMSE].createPlots(pltSctB4) if bestMBE != bestRMSE and bestMBE != bestMAE:
if len(stations2) > bestMAE: pltSctB3 = figScatterB.add_subplot(1,cols,pltIndex)
stations2[bestMAE].createPlots(pltSctB5)
if len(stations2) > bestMBE: stations[bestRMSE].createPlots(pltSctB1, True)
stations2[bestMBE].createPlots(pltSctB6) if bestMAE != bestRMSE:
figScatterB.subplots_adjust(wspace=0.3, hspace=0.3) stations[bestMAE].createPlots(pltSctB2)
if bestMBE != bestRMSE and bestMBE != bestMAE:
stations[bestMBE].createPlots(pltSctB3)
if len(stations2) > 0:
stations2[bestRMSE2].createPlots(pltSctB4, True)
if len(stations2) > 0 and bestMAE2 != bestRMSE2:
stations2[bestMAE2].createPlots(pltSctB5)
if len(stations2) > 0 and bestMBE2 != bestRMSE2 \
and bestMBE2 != bestMAE2:
stations2[bestMBE2].createPlots(pltSctB6)
figScatterB.subplots_adjust(wspace=space, hspace=space)
figScatterB.savefig(figFileName, dpi=600, format='png') figScatterB.savefig(figFileName, dpi=600, format='png')
plt.close() plt.close()
figScatterB.clf() figScatterB.clf()
# plotting best distribution
figFileName = pathFigs + year + '_' + dataType + '_bestDist.png'
figDistB = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType == 'temperature':
stations[bestRMSE].createDistPlots(figDistB.add_subplot(111))
else:
pltDistB1 = figDistB.add_subplot(2, 1, 1)
stations[bestRMSE].createDistPlots(pltDistB1)
pltDistB2 = figDistB.add_subplot(2, 1, 2)
stations2[bestRMSE2].createDistPlots(pltDistB2)
figDistB.subplots_adjust(wspace=space, hspace=space)
figDistB.savefig(figFileName, dpi=600, format='png')
plt.close()
figDistB.clf()
#- year's statistics --------------------------------------------------- #- year's statistics ---------------------------------------------------
if createMap: if createMap:
......
...@@ -15,8 +15,8 @@ import cartopy.io.shapereader as shpreader ...@@ -15,8 +15,8 @@ import cartopy.io.shapereader as shpreader
#from geopandas.tools import sjoin #from geopandas.tools import sjoin
dType = 'TMP' #data type dType = 'ASWDIR' #data type
year = '2013' year = '2015'
path = 'D:/WetterdatenPamore/' + year + '/' \ path = 'D:/WetterdatenPamore/' + year + '/' \
+ year + '_'+ dType + '.h5' + year + '_'+ dType + '.h5'
...@@ -54,4 +54,10 @@ with h5py.File(path, 'r') as f: ...@@ -54,4 +54,10 @@ with h5py.File(path, 'r') as f:
ax.scatter(refPos[1], refPos[0], transform=data_crs, c='black') ax.scatter(refPos[1], refPos[0], transform=data_crs, c='black')
ax.scatter(posRight[1], posRight[0], transform=data_crs, c='red') ax.scatter(posRight[1], posRight[0], transform=data_crs, c='red')
ax.scatter(posDown[1], posDown[0], transform=data_crs, c='blue') ax.scatter(posDown[1], posDown[0], transform=data_crs, c='blue')
print('Top left - Lat:', lat[0,0], " Lon:", lon[0,0])
print('Top Right - Lat:', lat[0,len(lat[0]) - 1], " Lon:", lon[0,len(lon[0]) - 1])
print('Bottom left - Lat:', lat[len(lat) - 1,0], " Lon:", lon[len(lon) - 1,0])
print('Bottom Right - Lat:', lat[len(lat) - 1,len(lat[0]) - 1], " Lon:", lon[len(lon) - 1,len(lon[0]) - 1])
plt.show() plt.show()
\ No newline at end of file
...@@ -19,6 +19,7 @@ import h5py ...@@ -19,6 +19,7 @@ import h5py
import math import math
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns
class WeatherStation: class WeatherStation:
# member variables --------------------- # member variables ---------------------
...@@ -414,3 +415,102 @@ class WeatherStation: ...@@ -414,3 +415,102 @@ class WeatherStation:
fig.set_xlabel('m / s') fig.set_xlabel('m / s')
if printAxis: if printAxis:
fig.set_ylabel('m / s') fig.set_ylabel('m / s')
def createDistPlots(self, fig):
"""(matplotlib.axes, bool) -> (void)
creates a scatter plot of obersvation data and model data (x and y,
repsectively). the scatter plot is added to the matplotlib axes given
at function call.
@remark: this re-uses some code of the differences() function. may be
this can be done less verbose and more well structed"""
differences = self.getDifferences();
valuesObs = []
valuesObsBounds = []
valuesMod = []
xAxis = []
histBins = []
i = 0
total = len(self.observationData)
if (len(self.modelData) < total): total = len(self.modelData)
if self.type == 'solar' \
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
histBins = [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, \
275, 300, 325, 350, 375, 400, 425, 450, 475, 500, 525,\
575, 600, 625, 650, 675, 700, 725, 750, 775, 800, 825,\
850, 875, 900, 925, 950, 975, 1000]
for i in range(0, total):
if (self.observationData[i] > -500. \
and self.observationData[i] < 2000.):
valuesObs.append(self.observationData[i])
valuesObsBounds.append(self.error)
valueMod = self.modelData[i]
if self.type == 'solar' \
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
joulePerCm2ToWhPerM2 = 1. * 10000. / 3600.
valuesObs[-1] *= joulePerCm2ToWhPerM2
valueMod = 0.
if i > 0 :
valueMod= self.modelData[i] \
* self.minuteIndicator[i] / 60. \
+ self.modelData[i - 1] \
* (60 - self.minuteIndicator[i - 1]) / 60.
valuesMod.append(valueMod)
xAxis.append(i)
else:
if self.type == 'temperature':
histBins = [-15, -14, -13, -12, -11, -10, -9, -8, -7, -6, \
-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, \
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, \
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
elif self.type == 'windDirection':
histBins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, \
100, 110, 120, 130, 140, 150, 160, 170, 180, 190, \
200, 210, 220, 230, 240, 250, 260, 270, 280, 290, \
300, 310, 320, 330, 340, 350, 360]
else:
histBins = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5,\
6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10, 10.5, \
11, 11.5, 12, 12.5, 13, 13.5, 14, 14.5, 15, 15.5,\
16, 16.5, 17, 17.5, 18, 18.5, 19, 19.5, 20, 20.5,\
21, 21.5, 22, 22.5, 23, 23.5, 24, 24.5, 25, 25.5,\
26, 26.5, 27, 27.5, 28, 28.5, 29, 29.5, 30, 30.5,\
31, 31.5, 32, 32.5, 33, 33.5, 34, 34.5, 35]
for i in range(0, total):
if (self.observationData[i] > -500. \
and self.observationData[i] < 2000.):
valuesObs.append(self.observationData[i])
valuesMod.append(self.modelData[i])
# plt.plot(differences, label = "diff")
# plt.plot(valuesObs, label = "measure")
# plt.plot(valuesMod, label = "model")
# upper = np.array(valuesObs) + np.array(valuesObsBounds)
# lower = np.array(valuesObs) - np.array(valuesObsBounds)
# plt.fill_between(xAxis, lower, upper)
# plt.title(self.getName() + ' ' + self.type + ' Run')
# plt.legend()
# plt.show()
# plt.figure()
#ax = plt.subplots()
#ax1 = sns.distplot(valuesObs, color="tab:orange", label='Observation')
#ax2 = sns.distplot(valuesMod, color="tab:blue", label='Model')
fig.hist(valuesObs, bins=histBins, color="tab:orange",
histtype='step', density=True, label='Observation')
fig.hist(valuesMod, bins=histBins, color="tab:blue",
histtype='step', density=True, label='Model')
fig.set_title(self.getName())
if self.type == 'temperature':
fig.set_xlabel("Temperature in °C")
elif self.type == 'solar' \
or self.type == 'diffuseIrridiation':
fig.set_xlabel('DHI in W / m$^2$')
elif self.type == 'globalIrridiation':
fig.set_xlabel('GHI in W / m$^2$')
elif self.type == 'windDirection':
fig.set_xlabel('Wind Direction in Deg')
else:
fig.set_xlabel('Wind speed in m / s')
fig.set_ylabel('Relative Occurences in -')
fig.legend()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment