Commit f549eb13 authored by Gerrit Erichsen's avatar Gerrit Erichsen

initial code. tested to do some work, does it. wait for next step

parent 430cf605
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 29 08:00:58 2019
@author: Gerrit Erichsen
"""
from station import WeatherStation
import h5py
import math
import numpy as np
import matplotlib.pyplot as plt
import os
def readMetaData(dataType, year, path):
"""(str, str, str) --> ([[str,str,str]])"""
ids = []
fileName = path + '/' + dataType + '/'
if (dataType == 'solar') :
fileName += 'ST'
elif (dataType == 'wind'):
fileName += 'FF'
else:
#assume temperature is supposed to be read
fileName += 'TU'
fileName += '_Stundenwerte_Beschreibung_Stationen.txt.'
file = open(fileName, "r")
content = file.readlines()
file.close()
for line in content:
if line[0].isdigit():
entries = line.split(' ')
#remove
while '' in entries:
entries.remove('')
if not entries[0] == '':
#test if year is within date start and date end
if int(entries[1][:4]) < int(year) \
and int(entries[2][:4]) >= int(year):
#append id, latitude, longitude
end = entries[2]
if int(end[:4]) > 2018 and dataType != 'solar':
end = '20181231'
elif int(end[:4]) > 2018 and dataType == 'solar':
end = '20190808'
ids.append([entries[0], entries[4], entries[5],\
entries[1], end])
return ids
def getFullFileName(dataType, stationId, start, end, path):
"""(str, str, str, str, str) --> (str)"""
fileName = path + '/' + dataType + '/stundenwerte_'
#add type qualifier
if (dataType == 'solar') :
fileName += 'ST'
elif (dataType == 'wind'):
fileName += 'FF'
else:
#assume temperature is supposed to be read
fileName += 'TU'
#add general stuff (which is different for solar)
if (dataType == 'solar'):
fileName += '_' + stationId + '_row/'
else:
fileName += '_' + stationId + '_' + start + '_'+ end + '_hist/'
fileName += 'produkt_'
#repeat type qualifiers (as they change letter-case now)
if (dataType == "solar") :
fileName += 'st'
elif (dataType == "wind"):
fileName += 'ff'
else:
#assume temperature is supposed to be read
fileName += 'tu'
#add final file stuff
fileName += '_stunde_' + start + '_' + end + '_' + stationId + '.txt'
if os.path.exists(fileName):
return fileName
return ''
def getFullFileNameH5(dataType,year, path, version = 1, height = 0):
"""(str, str, str, int, int) --> (str)"""
fileName = path + '/' + year + '/' + year + '_'
if (dataType == 'solar' and version == 1) :
fileName += 'ASWDIFD'
elif (dataType == 'solar' and version == 2) :
fileName += 'ASWDIR'
elif (dataType == 'wind' and version == 1):
fileName += 'WZU'
elif (dataType == 'wind' and version == 2):
fileName += 'WMV'
else:
#assume temperature is supposed to be read
fileName += 'TMP'
# wind needs additional file accessor
if (dataType == 'wind'):
fileName += '_50-51'
#TODO: do stuff
fileName += '.h5'
return fileName
def readGrid(fileName):
with h5py.File(fileName, 'r') as f:
lat = f['latitude'][:,:]
lon = f['longitude'][:,:]
return lat, lon
def findClosestCell(latGrid, lonGrid, targetLat, targetLon):
"""([[float]],[[float]],float,float)->(int,int)"""
iBest = 0
jBest = 0
currentDiff = calcDistance(latGrid[iBest][jBest], lonGrid[iBest][jBest],\
targetLat, targetLon)
i = 0
iMax = len(latGrid)
while i < iMax:
j = 0
jMax = len(latGrid[i])
while j < jMax:
diff = calcDistance(latGrid[i][j], lonGrid[i][j],\
targetLat, targetLon)
if diff < currentDiff:
iBest = i
jBest = j
currentDiff = diff
j += 1
i += 1
return (iBest, jBest)
def calculateMAE(differences):
return abs(np.array(differences)).mean()
def calculateMBE(differences):
return np.array(differences).mean()
def calculateRMSE(differences):
return math.sqrt(np.square(np.array(differences)).mean())
def calcDistance(latL, lonL, latR, lonR):
"""(float,float,float,float) -> (float)"""
return math.sqrt((latL - latR)**2 + (lonL - lonR)**2)
if __name__ == '__main__':
dataType = 'temperature' #either solar, temperature or wind
dataTypeShort = ''
year = '2015'
path = 'D:/WetterdatenCDC'
path2 = 'D:/WetterdatenPamore'
if (dataType == 'solar'):
dataTypeShort = 'ASWDIFD'
elif (dataType == 'wind'):
dataTypeShort = 'WZU'
else:
dataTypeShort = 'TMP'
cdcColumn = 3
metaData = readMetaData(dataType, year, path)
fileNameH5 = getFullFileNameH5(dataType, year, path2)
lat,lon = readGrid(fileNameH5)
stations = []
for station in metaData:
fileName = getFullFileName(dataType, station[0],\
station[3], station[4], path)
if fileName == '':
continue
stations.append(WeatherStation(station[0], station[0],\
station[1], station[2]))
stations[-1].readInCdcFile(fileName, year, cdcColumn)
latLonTuple = findClosestCell(lat, lon, stations[-1].getLatitude(),\
stations[-1].getLongitude())
stations[-1].readInModelFile(fileNameH5, dataTypeShort, \
latLonTuple[0], latLonTuple[1])
for station in stations:
differences = station.getDifferences()
print("Statistics are: ----------------------------------")
print(calculateRMSE(differences), calculateMAE(differences), \
calculateMBE(differences))
plt.plot(differences, label = "diff")
plt.plot(station.getMeasuredData(), label = "measure")
plt.plot(station.getModelData(), label = "model")
plt.title(station.getName())
plt.legend()
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 29 10:35:29 2019
@author: Gerrit Erichsen
"""
import h5py
class WeatherStation:
m_name = ""
m_id = ""
m_lat = ""
m_lon = ""
m_measuredData = []
m_modelData = []
def __init__(self, name, identifier, lat, lon):
self.m_name = name
self.m_id = identifier
self.m_lat = lat
self.m_lon = lon
def getName(self):
return self.m_name
def getLatitude(self):
return float(self.m_lat)
def getLongitude(self):
return float(self.m_lon)
def getMeasuredData(self):
return self.m_measuredData
def getModelData(self):
data = []
for element in self.m_modelData:
#TODO: make type dependent
data.append(element - 273.15)
return data
def getDifferences(self):
differences = []
i = 0
totalLength = len(self.m_measuredData)
print(totalLength)
if (len(self.m_modelData) < totalLength):
totalLength = len(self.m_modelData)
while (i < totalLength):
#TODO: make type dependent
differences.append(self.m_measuredData[i] - self.m_modelData[i] \
+ 273.15)
i += 1
return differences;
def readInCdcFile(self, fileName, targetYear, column):
file = open(fileName, "r")
content = file.readlines()
file.close()
lastHour = 23 #entry that was before current one, needed for missing data
for line in content:
entries = line.split(';')
if entries[1].startswith(targetYear):
hour = int(entries[1][-2:])
if hour > lastHour + 1:
i = 0
diff = hour - lastHour - 1
while i < diff:
self.m_measuredData.append(-999.)
i += 1
elif hour < lastHour and not(hour == 0 and lastHour == 23):
#assuming that there is never an entire day missing
i = 0
diff = 24 - lastHour + hour - 1
while i < diff:
self.m_measuredData.append(-999.)
i += 1
self.m_measuredData.append(float(entries[column]))
lastHour = hour
def readInModelFile(self, h5FileName, dataType, row, col):
with h5py.File(h5FileName, 'r') as h5File:
self.m_modelData = h5File[dataType][:,row,col]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment