Commit ca184d54 authored by Gerrit Erichsen's avatar Gerrit Erichsen

Yet another step to completion

parent f549eb13
......@@ -8,9 +8,9 @@ Created on Thu Aug 29 08:00:58 2019
from station import WeatherStation
import h5py
import math
import os
import numpy as np
import matplotlib.pyplot as plt
import os
def readMetaData(dataType, year, path):
"""(str, str, str) --> ([[str,str,str]])"""
......@@ -43,8 +43,8 @@ def readMetaData(dataType, year, path):
end = '20181231'
elif int(end[:4]) > 2018 and dataType == 'solar':
end = '20190808'
ids.append([entries[0], entries[4], entries[5],\
entries[1], end])
ids.append([entries[0], entries[6], entries[4], \
entries[5], entries[1], end])
return ids
def getFullFileName(dataType, stationId, start, end, path):
......@@ -128,15 +128,37 @@ def findClosestCell(latGrid, lonGrid, targetLat, targetLon):
i += 1
return (iBest, jBest)
def calculateMAE(differences):
return abs(np.array(differences)).mean()
def calculateMBE(differences):
return np.array(differences).mean()
def findWorstStation(stations):
"""([WeatherStation]) -> (int,int,int)"""
worstRMSE = 0
worstMAE = 0
worstMBE = 0
i = 0
while i < len(stations):
if stations[i].getRMSE() > stations[worstRMSE].getRMSE():
worstRMSE = i
if stations[i].getMAE() > stations[worstMAE].getMAE():
worstMAE = i
if abs(stations[i].getMBE()) > abs(stations[worstMBE].getMBE()):
worstMBE = i
i += 1
return worstRMSE, worstMAE, worstMBE
def calculateRMSE(differences):
return math.sqrt(np.square(np.array(differences)).mean())
def findBestStation(stations):
"""([WeatherStation]) -> (int,int,int)"""
bestRMSE = 0
bestMAE = 0
bestMBE = 0
i = 0
while i < len(stations):
if stations[i].getRMSE() < stations[bestRMSE].getRMSE():
bestRMSE = i
if stations[i].getMAE() < stations[bestMAE].getMAE():
bestMAE = i
if abs(stations[i].getMBE()) < abs(stations[bestMBE].getMBE()):
bestMBE = i
i += 1
return bestRMSE, bestMAE, bestMBE
def calcDistance(latL, lonL, latR, lonR):
"""(float,float,float,float) -> (float)"""
......@@ -163,34 +185,65 @@ if __name__ == '__main__':
lat,lon = readGrid(fileNameH5)
print("Done reading grid.")
stations = []
#metaData = metaData[0:1]
i = 1
total = len(metaData)
for station in metaData:
fileName = getFullFileName(dataType, station[0],\
station[3], station[4], path)
station[4], station[5], path)
if fileName == '':
print("Done", i, "/", total)
i+=1
continue
dummy = WeatherStation(station[1], station[0], station[2], station[3])
dummy.readInCdcFile(fileName, year, cdcColumn)
if (len(dummy.m_measuredData) == 0):
print("Done", i, "/", total)
i+=1
continue
stations.append(WeatherStation(station[0], station[0],\
station[1], station[2]))
stations[-1].readInCdcFile(fileName, year, cdcColumn)
latLonTuple = findClosestCell(lat, lon, stations[-1].getLatitude(),\
stations.append(dummy)
bLat, bLon = findClosestCell(lat, lon, stations[-1].getLatitude(), \
stations[-1].getLongitude())
stations[-1].readInModelFile(fileNameH5, dataTypeShort, \
latLonTuple[0], latLonTuple[1])
stations[-1].readInModelFile(fileNameH5, dataTypeShort, bLat, bLon)
print("Done", i, "/", total)
i+=1
rmse = []
mae = []
mbe = []
i = 1
total = len(stations)
for station in stations:
differences = station.getDifferences()
print("Statistics are: ----------------------------------")
print(calculateRMSE(differences), calculateMAE(differences), \
calculateMBE(differences))
plt.plot(differences, label = "diff")
plt.plot(station.getMeasuredData(), label = "measure")
plt.plot(station.getModelData(), label = "model")
plt.title(station.getName())
plt.legend()
station.calculateStats()
rmse.append(station.getRMSE())
mae.append(station.getMAE())
mbe.append(station.getMBE())
fig1 = plt.figure()
fig1.add_subplot(1,3,0)
plt.boxplot(rmse)
plt.title("RMSE")
fig1.add_subplot(1,3,1)
plt.boxplot(mae)
plt.title("MAE")
fig1.add_subplot(1,3,2)
plt.boxplot(mbe)
plt.title("MBE")
worstRMSE, worstMAE, worstMBE = findWorstStation(stations)
bestRMSE, bestMAE, bestMBE = findBestStation(stations)
stations[worstRMSE].createPlots()
stations[worstMAE].createPlots()
stations[worstMBE].createPlots()
stations[bestRMSE].createPlots()
stations[bestMAE].createPlots()
stations[bestMBE].createPlots()
\ No newline at end of file
......@@ -6,6 +6,9 @@ Created on Thu Aug 29 10:35:29 2019
"""
import h5py
import math
import numpy as np
import matplotlib.pyplot as plt
class WeatherStation:
m_name = ""
......@@ -14,6 +17,13 @@ class WeatherStation:
m_lon = ""
m_measuredData = []
m_modelData = []
m_mae = 0.
m_mbe = 0.
m_rmse = 0.
m_obsMax = 0.
m_obsMin = 0.
m_obsAvg = 0.
m_error = 0.2
def __init__(self, name, identifier, lat, lon):
self.m_name = name
......@@ -23,12 +33,20 @@ class WeatherStation:
def getName(self):
return self.m_name
def getIdentifier(self):
return self.m_id
def getLatitude(self):
return float(self.m_lat)
def getLongitude(self):
return float(self.m_lon)
def getMeasuredData(self):
return self.m_measuredData
def getRMSE(self):
return self.m_rmse
def getMAE(self):
return self.m_mae
def getMBE(self):
return self.m_mbe
def getModelData(self):
data = []
for element in self.m_modelData:
......@@ -39,12 +57,13 @@ class WeatherStation:
differences = []
i = 0
totalLength = len(self.m_measuredData)
print(totalLength)
if (len(self.m_modelData) < totalLength):
totalLength = len(self.m_modelData)
while (i < totalLength):
#TODO: make type dependent
differences.append(self.m_measuredData[i] - self.m_modelData[i] \
if (self.m_measuredData[i] > -500. and
self.m_measuredData[i] < 2000.):
differences.append(self.m_measuredData[i] - self.m_modelData[i] \
+ 273.15)
i += 1
return differences;
......@@ -76,4 +95,68 @@ class WeatherStation:
def readInModelFile(self, h5FileName, dataType, row, col):
with h5py.File(h5FileName, 'r') as h5File:
self.m_modelData = h5File[dataType][:,row,col]
\ No newline at end of file
self.m_modelData = h5File[dataType][:,row,col]
def calculateStats(self):
differences = self.getDifferences();
self.m_mae = WeatherStation.calculateMAE(differences);
self.m_mbe = WeatherStation.calculateMBE(differences);
self.m_rmse = WeatherStation.calculateRMSE(differences);
values = []
for element in self.m_measuredData:
if (element > -500. and element < 2000.):
values.append(element);
valueArray = np.array(values);
self.m_obsMin = valueArray.min()
self.m_obsMax = valueArray.max()
self.m_obsAvg = valueArray.mean()
def calculateMAE(differences):
return abs(np.array(differences)).mean()
def calculateMBE(differences):
return np.array(differences).mean()
def calculateRMSE(differences):
return math.sqrt(np.square(np.array(differences)).mean())
def createPlots(self):
differences = self.getDifferences();
valuesObs = []
valuesObsBounds = []
valuesMod = []
xAxis = []
i = 0
total = len(self.m_measuredData)
if (len(self.m_modelData) < total): total = len(self.m_modelData)
while i < total:
if (self.m_measuredData[i] > -500. \
and self.m_measuredData[i] < 2000.):
valuesObs.append(self.m_measuredData[i])
valuesObsBounds.append(self.m_error)
valuesMod.append(self.m_modelData[i] - 273.15)
xAxis.append(i)
i += 1
plt.figure()
plt.plot(differences, label = "diff")
plt.plot(valuesObs, label = "measure")
plt.plot(valuesMod, label = "model")
upper = np.array(valuesObs) + np.array(valuesObsBounds)
lower = np.array(valuesObs) - np.array(valuesObsBounds)
plt.fill_between(xAxis, lower, upper)
plt.title(self.getName())
plt.legend()
plt.show()
plt.figure()
plt.scatter(valuesObs, valuesMod, s=1, color="tab:red")
plt.plot(valuesObs, valuesObs)
maxX = np.array(valuesObs).max()
annotation = 'RMSE: %.3f\n' \
'MAE: %.3f\n' \
'MBE: %.3f\n' \
'Range: %.3f\n' \
'Avg: %.3f' % (self.m_rmse, self.m_mae, self.m_mbe, \
self.m_obsMax - self.m_obsMin, \
self.m_obsAvg)
plt.annotate(annotation, (maxX / 2,0))
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment