The URI of TUHH Docker Registry changed from "docker.rz.tu-harburg.de:5000" to "docker.rz.tu-harburg.de". Please update your gitlab-ci.yml files if you use images from this registry.

Commit 746ae958 authored by Gerrit Erichsen's avatar Gerrit Erichsen

BugFixes: Wrong deletion behaviour fixed, by centralizing it (also index comparison was wrong)

Features: Added histograms
Generates plots now, only for different RMSE/MAE/MBE, so number of subplots is reduced.
parent 45ad43de
......@@ -18,8 +18,8 @@ Created on Thu Aug 29 08:00:58 2019
from station import WeatherStation
import dwdfilehelper as dwd
import math
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
......@@ -189,7 +189,7 @@ if __name__ == '__main__':
ind = findStationId(stations, station[0])
if fileName == '':
stations.remove(stations[ind])
if ind >= len(stations2):
if ind < len(stations2) and len(stations2) > 0:
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
......@@ -203,13 +203,6 @@ if __name__ == '__main__':
if len(stations2) > 0:
stations2[ind].clear()
stations[ind].readInCdcFile(fileName, year, cdcColumn)
if (len(stations[ind].getMeasuredData()) == 0):
stations.remove(stations[ind])
if ind >= len(stations2):
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
if (stations[ind].getLatId() < 0) \
or (dataType == 'solar' and yearAsInt == 2015):
bLat, bLon = dwd.findClosestCell(lat, lon, \
......@@ -234,13 +227,6 @@ if __name__ == '__main__':
fileNameH5l = fileNameH5 + '_' + lower + '.h5'
stations[ind].readInModelFile(fileNameH5l, dataTypeShort, \
yearAsInt)
if len(stations[ind].modelData) == 0:
stations.remove(stations[ind])
if ind >= len(stations2):
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
if dataType == 'solar' or dataType == 'wind':
secondType = ''
if (dataType == 'solar'):
......@@ -254,12 +240,6 @@ if __name__ == '__main__':
station[2], station[3], \
secondType))
stations2[ind].readInCdcFile(fileName, year, cdcColumn + 1)
if (len(stations2[ind].observationData) == 0):
stations.remove(stations[ind])
tations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
if dataType == 'solar':
stations2[ind].readInModelFile(fileName2H5, \
dataTypeShort2,\
......@@ -282,13 +262,23 @@ if __name__ == '__main__':
stations2[ind].readInModelFile(fileNameH5l, \
dataTypeShort2, \
yearAsInt)
if (len(stations2[ind].modelData) == 0):
if len(stations[ind].modelData) == 0 \
or len(stations[ind].observationData) == 0:
stations.remove(stations[ind])
if ind < len(stations2) and len(stations2) > 0:
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
elif ind < len(stations2) and len(stations2) > 0:
if len(stations2[ind].modelData) == 0 \
or len(stations2[ind].observationData) == 0:
stations.remove(stations[ind])
stations2.remove(stations2[ind])
print("Done", i, "/", total)
i+=1
continue
print("Done read", i, "/", total)
i+=1
......@@ -332,7 +322,7 @@ if __name__ == '__main__':
print("Done stats1", i, "/", total)
i += 1
i = 1
total = len(stations)
total = len(stations2)
for station in stations2:
station.calculateStats()
rmse2.append(station.getRMSE())
......@@ -406,40 +396,99 @@ if __name__ == '__main__':
plt.close()
figStat2.clf()
#finding out, who did worst/best -----------------------------------
#finding out, who did worst/best ----------------------------------
worstRMSE, worstMAE, worstMBE = findWorstStation(stations)
bestRMSE, bestMAE, bestMBE = findBestStation(stations)
#plotting worst ----------------------------------------------------
nWorstPlots = 1
if (worstMAE != worstRMSE): nWorstPlots += 1
if (worstMBE != worstRMSE and worstMBE != worstMAE): nWorstPlots += 1
nBestPlots = 1
if (bestMAE != bestRMSE): nBestPlots += 1
if (bestMBE != bestRMSE and bestMBE != bestMAE): nBestPlots += 1
worstRMSE2 = 0
worstMAE2 = 0
worstMBE2 = 0
nWorstPlots2 = 0
bestRMSE2 = 0
bestMAE2 = 0
bestMBE2 = 0
nBestPlots2 = 0
if len(stations2) > 0:
worstRMSE2, worstMAE2, worstMBE2 = findWorstStation(stations2)
bestRMSE2, bestMAE2, bestMBE2 = findBestStation(stations2)
nWorstPlots2 = 1
if (worstMAE2 != worstRMSE2): nWorstPlots2 += 1
if (worstMBE2 != worstRMSE2 and worstMBE2 != worstMAE2): nWorstPlots2 += 1
nBestPlots2 = 1
if (bestMAE2 != bestRMSE2): nBestPlots2 += 1
if (bestMBE2 != bestRMSE2 and bestMBE2 != bestMAE2): nBestPlots2 += 1
#plotting worst ---------------------------------------------------
figFileName = pathFigs + year + '_' + dataType + '_worst.png'
heightInInch2 = heightInInch
if dataType != 'temperature':
heightInInch2 *= 2.
figScatterW = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType != 'temperature':
pltSctW1 = figScatterW.add_subplot(231)
pltSctW2 = figScatterW.add_subplot(232)
pltSctW3 = figScatterW.add_subplot(233)
pltSctW4 = figScatterW.add_subplot(234)
pltSctW5 = figScatterW.add_subplot(235)
pltSctW6 = figScatterW.add_subplot(236)
pltIndex = 1
cols = max(nWorstPlots, nWorstPlots2)
pltSctW1 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex = 2
if worstMAE != worstRMSE:
pltSctW2 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex += 1
if worstMBE != worstRMSE and worstMBE != worstMAE:
pltSctW3 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex = cols + 1
pltSctW4 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex += 1
if worstMAE2 != worstRMSE2:
pltSctW5 = figScatterW.add_subplot(2,cols,pltIndex)
pltIndex += 1
if worstMBE2 != worstRMSE2 and worstMBE2 != worstMAE2:
pltSctW6 = figScatterW.add_subplot(2,cols,pltIndex)
else:
pltSctW1 = figScatterW.add_subplot(131)
pltSctW2 = figScatterW.add_subplot(132)
pltSctW3 = figScatterW.add_subplot(133)
pltIndex = 1
cols = nWorstPlots
pltSctW1 = figScatterW.add_subplot(1,cols,pltIndex)
pltIndex = 2
if worstMAE != worstRMSE:
pltSctW2 = figScatterW.add_subplot(1,cols,pltIndex)
pltIndex += 1
if worstMBE != worstRMSE and worstMBE != worstMAE:
pltSctW3 = figScatterW.add_subplot(1,cols,pltIndex)
stations[worstRMSE].createPlots(pltSctW1, True)
stations[worstMAE].createPlots(pltSctW2)
stations[worstMBE].createPlots(pltSctW3)
if len(stations2) > worstRMSE:
stations2[worstRMSE].createPlots(pltSctW4, True)
if len(stations2) > worstMAE:
stations2[worstMAE].createPlots(pltSctW5)
if len(stations2) > worstMBE:
stations2[worstMBE].createPlots(pltSctW6)
figScatterW.subplots_adjust(wspace=0.3, hspace=0.3)
if worstMAE != worstRMSE:
stations[worstMAE].createPlots(pltSctW2)
if worstMBE != worstRMSE and worstMBE != worstMAE:
stations[worstMBE].createPlots(pltSctW3)
if len(stations2) > 0:
stations2[worstRMSE2].createPlots(pltSctW4, True)
if len(stations2) > 0 and worstMAE2 != worstRMSE2:
stations2[worstMAE2].createPlots(pltSctW5)
if len(stations2) > 0 and worstMBE2 != worstRMSE2 \
and worstMBE2 != worstMAE2:
stations2[worstMBE2].createPlots(pltSctW6)
space = 0.3
if dataType == 'temperature': space = 0.2
figScatterW.subplots_adjust(wspace=space, hspace=space)
figScatterW.savefig(figFileName, dpi=600, format='png')
plt.close()
figScatterW.clf()
# plotting worst distribution
figFileName = pathFigs + year + '_' + dataType + '_worstDist.png'
figDistW = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType == 'temperature':
stations[worstRMSE].createDistPlots(figDistW.add_subplot(111))
else:
pltDistW1 = figDistW.add_subplot(2, 1, 1)
stations[worstRMSE].createDistPlots(pltDistW1)
pltDistW2 = figDistW.add_subplot(2, 1, 2)
stations2[worstRMSE2].createDistPlots(pltDistW2)
figDistW.subplots_adjust(wspace=space, hspace=space)
figDistW.savefig(figFileName, dpi=600, format='png')
plt.close()
figDistW.clf()
# plotting best ----------------------------------------------------
figFileName = pathFigs + year + '_' + dataType + '_best.png'
heightInInch2 = heightInInch
......@@ -447,29 +496,64 @@ if __name__ == '__main__':
heightInInch2 *= 2.
figScatterB = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType != 'temperature':
pltSctB1 = figScatterB.add_subplot(231)
pltSctB2 = figScatterB.add_subplot(232)
pltSctB3 = figScatterB.add_subplot(233)
pltSctB4 = figScatterB.add_subplot(234)
pltSctB5 = figScatterB.add_subplot(235)
pltSctB6 = figScatterB.add_subplot(236)
pltIndex = 1
cols = max(nBestPlots, nBestPlots2)
pltSctB1 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex = 2
if bestMAE != bestRMSE:
pltSctB2 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex += 1
if bestMBE != bestRMSE and bestMBE != bestMAE:
pltSctB3 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex = cols + 1
pltSctB4 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex += 1
if bestMAE2 != bestRMSE2:
pltSctB5 = figScatterB.add_subplot(2,cols,pltIndex)
pltIndex += 1
if bestMBE2 != bestRMSE2 and bestMBE2 != bestMAE2:
pltSctB6 = figScatterB.add_subplot(2,cols,pltIndex)
else:
pltSctB1 = figScatterB.add_subplot(131)
pltSctB2 = figScatterB.add_subplot(132)
pltSctB3 = figScatterB.add_subplot(133)
stations[bestRMSE].createPlots(pltSctB1)
stations[bestMAE].createPlots(pltSctB2)
stations[bestMBE].createPlots(pltSctB3)
if len(stations2) > bestRMSE:
stations2[bestRMSE].createPlots(pltSctB4)
if len(stations2) > bestMAE:
stations2[bestMAE].createPlots(pltSctB5)
if len(stations2) > bestMBE:
stations2[bestMBE].createPlots(pltSctB6)
figScatterB.subplots_adjust(wspace=0.3, hspace=0.3)
pltIndex = 1
cols = nBestPlots
pltSctB1 = figScatterB.add_subplot(1,cols,pltIndex)
pltIndex = 2
if bestMAE != bestRMSE:
pltSctB2 = figScatterB.add_subplot(1,cols,pltIndex)
pltIndex += 1
if bestMBE != bestRMSE and bestMBE != bestMAE:
pltSctB3 = figScatterB.add_subplot(1,cols,pltIndex)
stations[bestRMSE].createPlots(pltSctB1, True)
if bestMAE != bestRMSE:
stations[bestMAE].createPlots(pltSctB2)
if bestMBE != bestRMSE and bestMBE != bestMAE:
stations[bestMBE].createPlots(pltSctB3)
if len(stations2) > 0:
stations2[bestRMSE2].createPlots(pltSctB4, True)
if len(stations2) > 0 and bestMAE2 != bestRMSE2:
stations2[bestMAE2].createPlots(pltSctB5)
if len(stations2) > 0 and bestMBE2 != bestRMSE2 \
and bestMBE2 != bestMAE2:
stations2[bestMBE2].createPlots(pltSctB6)
figScatterB.subplots_adjust(wspace=space, hspace=space)
figScatterB.savefig(figFileName, dpi=600, format='png')
plt.close()
figScatterB.clf()
# plotting best distribution
figFileName = pathFigs + year + '_' + dataType + '_bestDist.png'
figDistB = plt.figure(figsize=(widthInInch, heightInInch2))
if dataType == 'temperature':
stations[bestRMSE].createDistPlots(figDistB.add_subplot(111))
else:
pltDistB1 = figDistB.add_subplot(2, 1, 1)
stations[bestRMSE].createDistPlots(pltDistB1)
pltDistB2 = figDistB.add_subplot(2, 1, 2)
stations2[bestRMSE2].createDistPlots(pltDistB2)
figDistB.subplots_adjust(wspace=space, hspace=space)
figDistB.savefig(figFileName, dpi=600, format='png')
plt.close()
figDistB.clf()
#- year's statistics ---------------------------------------------------
if createMap:
......
......@@ -15,8 +15,8 @@ import cartopy.io.shapereader as shpreader
#from geopandas.tools import sjoin
dType = 'TMP' #data type
year = '2013'
dType = 'ASWDIR' #data type
year = '2015'
path = 'D:/WetterdatenPamore/' + year + '/' \
+ year + '_'+ dType + '.h5'
......@@ -54,4 +54,10 @@ with h5py.File(path, 'r') as f:
ax.scatter(refPos[1], refPos[0], transform=data_crs, c='black')
ax.scatter(posRight[1], posRight[0], transform=data_crs, c='red')
ax.scatter(posDown[1], posDown[0], transform=data_crs, c='blue')
print('Top left - Lat:', lat[0,0], " Lon:", lon[0,0])
print('Top Right - Lat:', lat[0,len(lat[0]) - 1], " Lon:", lon[0,len(lon[0]) - 1])
print('Bottom left - Lat:', lat[len(lat) - 1,0], " Lon:", lon[len(lon) - 1,0])
print('Bottom Right - Lat:', lat[len(lat) - 1,len(lat[0]) - 1], " Lon:", lon[len(lon) - 1,len(lon[0]) - 1])
plt.show()
\ No newline at end of file
......@@ -19,6 +19,7 @@ import h5py
import math
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
class WeatherStation:
# member variables ---------------------
......@@ -414,3 +415,102 @@ class WeatherStation:
fig.set_xlabel('m / s')
if printAxis:
fig.set_ylabel('m / s')
def createDistPlots(self, fig):
"""(matplotlib.axes, bool) -> (void)
creates a scatter plot of obersvation data and model data (x and y,
repsectively). the scatter plot is added to the matplotlib axes given
at function call.
@remark: this re-uses some code of the differences() function. may be
this can be done less verbose and more well structed"""
differences = self.getDifferences();
valuesObs = []
valuesObsBounds = []
valuesMod = []
xAxis = []
histBins = []
i = 0
total = len(self.observationData)
if (len(self.modelData) < total): total = len(self.modelData)
if self.type == 'solar' \
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
histBins = [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, \
275, 300, 325, 350, 375, 400, 425, 450, 475, 500, 525,\
575, 600, 625, 650, 675, 700, 725, 750, 775, 800, 825,\
850, 875, 900, 925, 950, 975, 1000]
for i in range(0, total):
if (self.observationData[i] > -500. \
and self.observationData[i] < 2000.):
valuesObs.append(self.observationData[i])
valuesObsBounds.append(self.error)
valueMod = self.modelData[i]
if self.type == 'solar' \
or self.type == 'diffuseIrridiation' \
or self.type == 'globalIrridiation':
joulePerCm2ToWhPerM2 = 1. * 10000. / 3600.
valuesObs[-1] *= joulePerCm2ToWhPerM2
valueMod = 0.
if i > 0 :
valueMod= self.modelData[i] \
* self.minuteIndicator[i] / 60. \
+ self.modelData[i - 1] \
* (60 - self.minuteIndicator[i - 1]) / 60.
valuesMod.append(valueMod)
xAxis.append(i)
else:
if self.type == 'temperature':
histBins = [-15, -14, -13, -12, -11, -10, -9, -8, -7, -6, \
-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, \
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, \
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]
elif self.type == 'windDirection':
histBins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, \
100, 110, 120, 130, 140, 150, 160, 170, 180, 190, \
200, 210, 220, 230, 240, 250, 260, 270, 280, 290, \
300, 310, 320, 330, 340, 350, 360]
else:
histBins = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5,\
6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10, 10.5, \
11, 11.5, 12, 12.5, 13, 13.5, 14, 14.5, 15, 15.5,\
16, 16.5, 17, 17.5, 18, 18.5, 19, 19.5, 20, 20.5,\
21, 21.5, 22, 22.5, 23, 23.5, 24, 24.5, 25, 25.5,\
26, 26.5, 27, 27.5, 28, 28.5, 29, 29.5, 30, 30.5,\
31, 31.5, 32, 32.5, 33, 33.5, 34, 34.5, 35]
for i in range(0, total):
if (self.observationData[i] > -500. \
and self.observationData[i] < 2000.):
valuesObs.append(self.observationData[i])
valuesMod.append(self.modelData[i])
# plt.plot(differences, label = "diff")
# plt.plot(valuesObs, label = "measure")
# plt.plot(valuesMod, label = "model")
# upper = np.array(valuesObs) + np.array(valuesObsBounds)
# lower = np.array(valuesObs) - np.array(valuesObsBounds)
# plt.fill_between(xAxis, lower, upper)
# plt.title(self.getName() + ' ' + self.type + ' Run')
# plt.legend()
# plt.show()
# plt.figure()
#ax = plt.subplots()
#ax1 = sns.distplot(valuesObs, color="tab:orange", label='Observation')
#ax2 = sns.distplot(valuesMod, color="tab:blue", label='Model')
fig.hist(valuesObs, bins=histBins, color="tab:orange",
histtype='step', density=True, label='Observation')
fig.hist(valuesMod, bins=histBins, color="tab:blue",
histtype='step', density=True, label='Model')
fig.set_title(self.getName())
if self.type == 'temperature':
fig.set_xlabel("Temperature in °C")
elif self.type == 'solar' \
or self.type == 'diffuseIrridiation':
fig.set_xlabel('DHI in W / m$^2$')
elif self.type == 'globalIrridiation':
fig.set_xlabel('GHI in W / m$^2$')
elif self.type == 'windDirection':
fig.set_xlabel('Wind Direction in Deg')
else:
fig.set_xlabel('Wind speed in m / s')
fig.set_ylabel('Relative Occurences in -')
fig.legend()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment