Methods
Großwetterlagen¶
Luftmassen, woher kommen sie, wie lange bleiben sie und welche lokalen und jahreszeitlichen Witterungsmerkmale sind damit verbunden. Welche neigen dazu, das kritische Schwellen der Lufteigenschaften überschritten werden können.
Großwetterlagen









Abb.: Verteilung von beitragenden Großwetterlagen an bestimmten Wertebereichen verschiedener Parameter: meteorologische Mesgröße, Luftqualität, Gesundheit. Mittel über alle Orte
Parallelkoordinaten¶
Zusammenspiel aller möglichen Einflussgrößen mit der Zielgröße durch filtern.

Abb.: Übersichtsdiagramm der direkten Abhängigkeiten zwischen den meteorologischen Messgrößen, Luftqualität und Patienten. Es zeigt ausßerdem Filtermöglichkeiten nach der Temperatur (rot) bzw. Patienten (magenta). Mittel über alle Orte
Statistik¶
Schwellen¶
| Parameter | Unit | -1.5 σ | -1.0 σ | -0.5 σ | Ø | +0.5 σ | +1.0 σ | +1.5 σ |
|---|---|---|---|---|---|---|---|---|
| temperature | °C | -11.4 | -7.6 | -3.8 | 10.6 | 3.8 | 7.6 | 11.4 |
| humidity | % | -19.1 | -12.7 | -6.4 | 75.0 | 6.4 | 12.7 | 19.1 |
| airpressure | hPa | -12.5 | -8.3 | -4.2 | 1006.6 | 4.2 | 8.3 | 12.5 |
| sunshine | h | -6.4 | -4.3 | -2.1 | 4.7 | 2.1 | 4.3 | 6.4 |
| windspeed | m/s | -2.0 | -1.3 | -0.7 | 3.6 | 0.7 | 1.3 | 2.0 |
| nox | -45.7 | -30.5 | -15.2 | 65.4 | 15.2 | 30.5 | 45.7 | |
| ozone | -34.4 | -22.9 | -11.5 | 46.3 | 11.5 | 22.9 | 34.4 | |
| particles | -17.0 | -11.3 | -5.7 | 16.9 | 5.7 | 11.3 | 17.0 | |
| admissions | N | -34.3 | -22.8 | -11.4 | 62.2 | 11.4 | 22.8 | 34.3 |
Anteile¶
| Parameter | % | -1.5 σ | % | -1.0 σ | % | -0.5 σ | % | Ø | % | +0.5 σ | % | +1.0 σ | % | +1.5 σ | % |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| temperature | 6.4 | 10.2 | 16.4 | 18.0 | 13.8 | 17.3 | 12.3 | 5.7 | |||||||
| humidity | 7.4 | 11.3 | 14.0 | 15.8 | 15.2 | 17.7 | 13.6 | 5.0 | |||||||
| airpressure | 6.4 | 9.0 | 13.7 | 19.8 | 21.9 | 14.2 | 8.4 | 6.5 | |||||||
| sunshine | 0.0 | 25.6 | 16.6 | 12.0 | 12.8 | 13.7 | 9.7 | 9.6 | |||||||
| windspeed | 2.6 | 11.4 | 21.1 | 20.0 | 17.7 | 13.0 | 6.0 | 8.1 | |||||||
| nox | 0.2 | 12.2 | 21.9 | 24.6 | 17.2 | 10.5 | 5.6 | 7.8 | |||||||
| ozone | 8.0 | 11.0 | 12.9 | 16.5 | 20.4 | 15.4 | 9.7 | 6.0 | |||||||
| particles | 0.0 | 2.4 | 35.4 | 26.8 | 15.1 | 7.6 | 3.7 | 9.2 | |||||||
| admissions | 3.8 | 12.3 | 17.3 | 22.2 | 15.4 | 13.2 | 8.3 | 7.7 |
Entscheidungsbaum¶
Trainieren¶

Testen¶

Abb.: Kreuzvalidierung des Prognosemodells mit einem hohen Zusammenhang. Der Beitrag der einzelnen Faktoren sind durch Balken aufgetragen.
Code¶
Loading
# coding: utf-8
# -*- coding: iso-8859-1 -*-
import sys
import os
import matplotlib
matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as P
import numpy as N
from matplotlib.offsetbox import AnchoredText
import pandas as pd
from sklearn import tree #For our Decision Tree
import pandas as pd # For our DataFrame
import pydotplus # To create our Decision Tree Graph
from IPython.display import Image # To Display a image of our graph
import numpy as N
import pylab as P
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from IPython.display import Image
P.style.use('bmh')
params = {'legend.fontsize': 8,'font.family': 'serif'}
P.rcParams.update(params)
Defining
def moving_average(a,n=3):
ret=N.cumsum(a,dtype=float)
ret[n:]=ret[n:]-ret[:-n]
return ret[n-1:]/n
Reading
data = N.genfromtxt('../csv/collect_bzk.csv',delimiter=';',names=True,dtype=None,encoding=None)
gwls = N.genfromtxt('../csv/gwlneudatum.dat',names=True,dtype=None,encoding=None)
id = N.where((gwls['ja']>=2012)&(gwls['ja']<=2016))[0]
nd = len(id)
gw = N.array(gwls['gw'][id],str)
orte = ['SteglitzZehlendorf',
'Pankow',
'Neukölln',
'Marzahn-Hellersdorf',
'FriedrichshainKreuzberg',
'TreptowKoepenick',
'Lichtenberg',
'Spandau',
'Mitte',
'TempelhofSchoeneberg',
'CharlottenburgWilmersdorf',
'Reinickendorf',
'Potsdam']
no = len(orte)
para = {'temperature':['Temperature [$^\circ$C]',-10,32,2],
'humidity':['Relative Humidity [%]',30,105,5],
'airpressure':['Air Pressure [hPa]',960,1040,5],
'sunshine':['Sunshine Duration [h]',0,13,1],
'windspeed':['Windspeed [m/s]',0,12,1],
'nox':['NOX [nox]',0,320,20],
'ozone':['Ozone [O3]',0,150,10],
'particles':['Particles [PM25]',0,120,10],
'admissions': ['Admissions [N]',0,160,10]
}
Analyzing
for par in para:
tx = 0
for ort in orte:
id = N.where(data['orte']==ort)[0]
tx = tx + N.array(data[par][id],float)
if(par!='admissions'): tx = tx/float(no)
fig = P.figure(figsize=(10,6))
ax = P.subplot(111)
bin = N.arange(para[par][1],para[par][2],para[par][3]);nb = len(bin)
num = N.zeros(nb,float)
tem = N.zeros(nb,float)
for ib in range(nb-1):
bu = bin[ib]
bo = bin[ib+1]
ind = N.where((tx>=bu)&(tx<=bo))[0]
tmp = list(set(gw[ind]))
num[ib] = len(tmp)
tem[ib] = len(ind)
if(len(tmp)>0):
rnk = []
for ig in tmp:
jnd = N.where(ig==gw[ind])[0]
rnk.append(len(jnd))
rnk = N.array(rnk)
ind = N.argsort(rnk)
for i in range(len(tmp)):
if(i<len(tmp)-2):
P.text(bu,0.5+i,tmp[ind[i]],fontsize=8,horizontalalignment='center')
else:
P.text(bu,0.3+i,tmp[ind[i]],fontsize=8,color='r',horizontalalignment='center',weight='bold')
P.bar(bin,num,para[par][3],color='g',alpha=0.3,edgecolor='k')
P.xlabel(para[par][0],fontsize=14,weight='bold')
P.ylim(0,31)
P.ylabel('Number of Contributing Grosswetterlagen',fontsize=14,weight='bold')
ax.tick_params(direction='out')
P.tight_layout()
P.savefig('./img/gwl_%s.png'%par,dpi=240,bbox_inches='tight',pad_inches=0.0)#,transparent='true')
Plotting
fig = P.figure(figsize=(12,5))
ax = P.subplot(111)
df = pd.DataFrame()
#orte = ['Mitte'];no = len(orte)
for par in para:
dum = N.empty(nd,dtype='object')
tx = 0
for ort in orte:
id = N.where(data['orte']==ort)[0]
tx = tx + N.array(data[par][id],float)
if(par!='admissions'): tx = tx/float(no)
tx = moving_average(tx,7)
if(par=='temperature'): x1 = tx
if(par=='humidity'): x2 = tx
if(par=='sunshine'): x3 = tx
if(par=='airpressure'): x4 = tx
if(par=='windspeed'): x5 = tx
if(par=='ozone'): x6 = tx
if(par=='nox'): x7 = tx
if(par=='particles'): x8 = tx
if(par=='admissions'): y1 = tx
print ('**%s**|%.1f|%.1f|%.1f|**%.1f**|%.1f|%.1f|%.1f'%(par,-1.5*N.std(tx),-1.0*N.std(tx),-0.5*N.std(tx),N.mean(tx),0.5*N.std(tx),1.0*N.std(tx),1.5*N.std(tx)))
bin = N.array([-9.9,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,9.9]);nb = len(bin)
sym = ['','-4s','-3s','-2s','-1s','+1s','+2s','+3s','+4s']
bn = []
for b in range(1,nb):
bu = bin[b-1]*N.std(tx)+N.mean(tx)
bo = bin[b]*N.std(tx)+N.mean(tx)
id = N.where((tx>=bu)&(tx<bo))[0]
bn.append(100.*len(id)/nd)
dum[id] = sym[b]
df[par] = tx#dum
print ('**%s**|%.1f||%.1f||%.1f||%.1f||%.1f||%.1f||%.1f||%.1f'%(par,bn[0],bn[1],bn[2],bn[3],bn[4],bn[5],bn[6],bn[7]))
print (df)
x1 = (x1-N.mean(x1))/N.std(x1)
x2 = (x2-N.mean(x2))/N.std(x2)
x3 = (x3-N.mean(x3))/N.std(x3)
x4 = (x4-N.mean(x4))/N.std(x4)
x5 = (x5-N.mean(x5))/N.std(x5)
x6 = (x6-N.mean(x6))/N.std(x6)
x7 = (x7-N.mean(x7))/N.std(x7)
x8 = (x8-N.mean(x8))/N.std(x8)
y1 = (y1-N.mean(y1))/N.std(y1)
for d in range(nd-7):
if(x1[d]>1.5):
P.plot([1,2,3,4,5,6,7,8,9],[x1[d],x2[d],x3[d],x4[d],x5[d],x6[d],x7[d],x8[d],y1[d]],'r',alpha=0.8,lw=0.1,zorder=10)
elif(y1[d]>1.0):
P.plot([1,2,3,4,5,6,7,8,9],[x1[d],x2[d],x3[d],x4[d],x5[d],x6[d],x7[d],x8[d],y1[d]],'m',alpha=0.8,lw=0.1,zorder=10)
else:
P.plot([1,2,3,4,5,6,7,8,9],[x1[d],x2[d],x3[d],x4[d],x5[d],x6[d],x7[d],x8[d],y1[d]],'k',alpha=0.1,lw=0.1,zorder=5)
for i in [1,2,3,4,5,6,7,8,9]:
P.plot([i,i],[-3,3],'g',lw=0.5,zorder=15)
at = AnchoredText("MA=7, LAG=0",prop=dict(size=12),frameon=True,loc='upper left')
at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
ax.add_artist(at)
at = AnchoredText("2012-2016",prop=dict(size=12),frameon=True,loc='lower right')
at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
ax.add_artist(at)
P.ylabel('Normalized Values',fontsize=16,weight='bold')
P.ylim(-3,3)
P.yticks(fontsize=12,weight='bold')
P.xticks([1,2,3,4,5,6,7,8,9],['Temperature','Humidity','Sunshine','AirPressure','Windspeed','Ozone','NOX','Particles','Admissions'],fontsize=12,weight='bold')
ax.tick_params(direction='out')
P.tight_layout()
P.savefig('./img/parallel.png',dpi=240,bbox_inches='tight',pad_inches=0.0)#,transparent='true')
Decision
para = ['temperature','humidity','airpressure','sunshine','windspeed','nox','admissions','particles','ozone']
for k in ['admissions']:
if(k=='ozone'):
para = ['temperature','humidity','sunshine']
y = N.array(df.ozone,int) # Target variable
else:
para = ['temperature','humidity','airpressure','windspeed','sunshine','ozone','nox','particles']
y = N.array(df.admissions,int) # Target variable
x = df[para]
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=1,shuffle=True)
clf = tree.DecisionTreeClassifier(criterion="gini",splitter='best')
clf_train = clf.fit(x_train,y_train)
class_names = clf.classes_
print (class_names)
#n_nodes = clf.tree_.node_count
#children_left = clf.tree_.children_left
#children_right = clf.tree_.children_right
#feature = clf.tree_.feature
#threshold = clf.tree_.threshold
#from sklearn.tree.export import export_text
#tree_rules = export_text(clf, feature_names=para)
#print(tree_rules)
#print (n_nodes,children_left,children_right,feature,threshold)
imp = clf.tree_.compute_feature_importances(normalize=True)
y_pred = clf.predict(x_test)
cc = N.corrcoef(y_test,y_pred)[0,1]
P.figure(figsize=(8,7))
ax = P.subplot(211)
P.scatter(y_test,y_pred,s=20,c='None',ec='k')
ax.tick_params(direction='out')
# if(k=='ozone'):
# tit = 'Ozone ~ Temperature + Humidity + Sunshine'
# else:
# tit = 'Admissions ~ Temperature + Humidity + Airpressure + Sunshine + Windspeed + Ozone + Nox + Particles'
P.ylabel('Y Predict',fontsize=14,weight='bold')
P.xlabel('Y Test',fontsize=14,weight='bold')
# P.title(tit,fontsize=10,weight='bold')
at = AnchoredText("MA=7, LAG=0",prop=dict(size=12),frameon=True,loc='upper left')
at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
ax.add_artist(at)
at = AnchoredText("R = %.2f"%cc,prop=dict(size=14),frameon=True,loc='lower right')
at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
ax.add_artist(at)
ax = P.subplot(212)
P.bar(N.arange(len(para)),imp,0.6,color='r',ec='k',alpha=0.6)
P.xticks(N.arange(len(para)),para,rotation=45,fontsize=12,weight='bold')
P.ylabel('Feature Importance',fontsize=12,weight='bold')
ax.tick_params(direction='out')
P.tight_layout()
P.savefig('./img/score_%s.png'%k,dpi=240,bbox_inches='tight',pad_inches=0.0)#,transparent='true')
dot_data = tree.export_graphviz(clf_train,out_file=None,feature_names=para,class_names=N.array(class_names,str),rounded=True,filled=True,max_depth=4)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_png('./img/tree.png')
Image(graph.create_png())