Skip to content

Decision

Decision Tree

Amman: >30mm/d (1982-2021)

Step 1: categorization

MON SSTA T850A GWL RAIN
Jan cold warm WZ Y
Feb warm cold HM N
Mar hot icy TRM

Step 2: Decision Tree, random, training ⅔, testing ⅓

Figure: Existing Classification of European Weather-Types after Hess/Breszowsky.

Figure:| —|— upper left:|Monthly distribution of heavy rainfall events >30 mm/d in Amman. upper center:|Distribution of heavy rainfall events in Amman dependent on SST anomalies: cold|warm|hot. upper right:|Distribution of heavy rainfall events in Amman dependent on T850 anomalies: warm|cold|icy. upper right:|Distribution of heavy rainfall events in Amman dependent on European Weather-Types: WZ|HM|TRM.

Decision Tree

graph LR
  A[MONTH<sub>t</sub>] --> B[SSTA<sub>t</sub>] --> C[T850A<sub>t</sub>] --> D[GWL<sub>t-3</sub>] --> D[GWL<sub>t</sub>] --> F[RAIN<sub>t</sub>];

Code

Importing

import sys
import os
import matplotlib
matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import matplotlib.pyplot as P
from mpl_toolkits.basemap import Basemap,shiftgrid,maskoceans
from matplotlib.path import Path
from matplotlib.patches import PathPatch
import shapefile
#from netCDF4 import Dataset
import numpy as N
from netCDF4 import Dataset, num2date,date2num
import datetime
from scipy import signal,stats
import matplotlib.colors as colors
#from datetime import datetime
from matplotlib.offsetbox import AnchoredText
#from scipy.interpolate import griddata
#import scipy
import matplotlib as mpl
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn import tree


plt.style.use('seaborn-talk')

params = {      'legend.fontsize': 8,\
                'font.family': 'serif',\
                }
plt.rcParams.update(params)

def ma(a,n=3):
    ret=N.cumsum(a,dtype=float)
    ret[n:]=ret[n:]-ret[:-n]
    return ret[n-1:]/n

Reading

city = {

   'Amman':[35.930359,31.963158],
   'Aqaba':[35.00778,29.52667],
   'WadiMusa':[35.480125,30.321635],
}

ort = 'Amman'
#ort = 'Aqaba'

jo = N.arange(1981,2022,1);nj = len(jo)
mo = N.arange(1,13,1);nm = len(mo)
mon = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

file = '../../data/gwl/gwlneudatum.dat'
dd = N.genfromtxt(file,usecols=(0),skip_header=1,dtype='i')
mm = N.genfromtxt(file,usecols=(1),skip_header=1,dtype='i')
jj = N.genfromtxt(file,usecols=(2),skip_header=1,dtype='i')
gw = N.genfromtxt(file,usecols=(3),skip_header=1,dtype='str')

id = N.where((jj>=1982)&(jj<=2021))[0]#&((mm==9)|(mm==10)|(mm==11)|(mm==12)|(mm==1)|(mm==2)|(mm==3)|(mm==4)))[0]

gw = gw[id]

gw[gw=='U'] = 'WZ'
gw[gw=='WW'] = 'WZ'
gw[gw=='WA'] = 'WZ'
#gw[gw=='WS'] = 'WZ'


gw[gw=='TRW'] = 'SWZ'
#gw[gw=='SZ'] = 'SWZ'

gw[gw=='TM'] = 'TRM'

gw[gw=='HB'] = 'NWA'

gw[gw=='SA'] = 'S'
gw[gw=='SZ'] = 'S'

gw[gw=='NA'] = 'N'
gw[gw=='NZ'] = 'N'

gw[gw=='NEA'] = 'NE'
gw[gw=='NEZ'] = 'NE'

'''

gw[gw=='TM'] = 'TRM'

gw[gw=='SEA'] = 'SE'
gw[gw=='SEZ'] = 'SE'

gw[gw=='SWA'] = 'SW'
gw[gw=='SWZ'] = 'SW'

gw[gw=='HNFA'] = 'HNF'
gw[gw=='HNFZ'] = 'HNF'

gw[gw=='U'] = 'W'
gw[gw=='WA'] = 'W'
gw[gw=='WZ'] = 'W'
gw[gw=='WW'] = 'W'
gw[gw=='WS'] = 'W'

gw[gw=='NEA'] = 'NE'
gw[gw=='NEZ'] = 'NE'

gw[gw=='NWA'] = 'NW'
gw[gw=='NWZ'] = 'NW'

gw[gw=='NA'] = 'N'
gw[gw=='NZ'] = 'N'

gw[gw=='HNA'] = 'HN'
gw[gw=='HNZ'] = 'HN'

gw[gw=='HFA'] = 'HF'
gw[gw=='HFZ'] = 'HF'

gw[gw=='SA'] = 'S'
gw[gw=='SZ'] = 'S'

'''

go = N.array(list(set(gw)));ng = len(go)

print (go)

file = '../../data/ClimateExplorer/isstoiv2_daily_anom_20-35E_30-40N_n_su.dat'
#file = '../../data/ClimateExplorer/isstoiv2_daily_mean_20-35E_30-40N_n_su.dat'

dd = N.genfromtxt(file,usecols=(0),comments='#',dtype='str')
st = N.genfromtxt(file,usecols=(1),comments='#',dtype='f')

jj = []
mm = []

for d in dd:

    jj.append(int(d[0:4]))
    mm.append(int(d[4:6]))

jj = N.array(jj)
mm = N.array(mm)

id = N.where((jj>=1982)&(jj<=2021))[0]#&((mm==9)|(mm==10)|(mm==11)|(mm==12)|(mm==1)|(mm==2)|(mm==3)|(mm==4)))[0]

st = st[id]

file = '../../data/ClimateExplorer/iera5_t850_daily_20-35E_30-40N_n_su.dat'
dd = N.genfromtxt(file,usecols=(0),comments='#',dtype='str')


file = '../../data/ClimateExplorer/iera5_t850_daily_20-35E_30-40N_n_su_a.txt'
#dd = N.genfromtxt(file,usecols=(0),comments='#',dtype='str')
tp = N.genfromtxt(file,usecols=(1),comments='#',dtype='f')

jj = []
mm = []

for d in dd:

    jj.append(int(d[0:4]))
    mm.append(int(d[4:6]))

jj = N.array(jj)
mm = N.array(mm)

id = N.where((jj>=1982)&(jj<=2021))[0]#&((mm==9)|(mm==10)|(mm==11)|(mm==12)|(mm==1)|(mm==2)|(mm==3)|(mm==4)))[0]

tp = tp[id]

#file = '../../data/ClimateExplorer/v2p0chirps_25_34-40E_29-34N.cdf'
#file = '../../data/ClimateExplorer/era5_tp_daily_af_34-40E_29-34N_su.cdf'
file = '../../data/ClimateExplorer/pr_W5E5v2.0_19810101-20191231.cdf'

nc = Dataset(file,'r')
lon = N.array(nc.variables['lon'][:])
lat = N.array(nc.variables['lat'][:])
dat = N.array(nc.variables['tp'][:])
tim = nc.variables['time']

tim = num2date(tim[:],units=tim.units,calendar=tim.calendar) 

jj = []
mm = []
dd = []

for it in tim:

    jj.append(it.year)
    mm.append(it.month)
    dd.append(it.day)

jj = N.array(jj)
mm = N.array(mm)
dd = N.array(dd)

id = N.where((jj>=1982)&(jj<=2021))[0]#&((mm==9)|(mm==10)|(mm==11)|(mm==12)|(mm==1)|(mm==2)|(mm==3)|(mm==4)))[0]

jj = jj[id]
mm = mm[id]
dd = dd[id]

dat = dat[id,:,:]

nd = len(dd)

nc.close()

Processing

nom = {'day':[],'month':[],'year':[],'gwl':[],'sst':[],'Amman':[],'WadiMusa':[],'Aqaba':[],'t850':[]}
cat = {'mon':[],'sst':[],'t850':[],'gwl0':[],'gwl1':[],'Amman':[],'WadiMusa':[],'Aqaba':[]}

sstu = N.percentile(st,25)
ssto = N.percentile(st,75)

t850u = N.percentile(tp,25)
t850o = N.percentile(tp,75)

lo,la = N.meshgrid(lon,lat)

for d in range(3,nd):

    nom['day'].append(dd[d])
    nom['month'].append(mm[d])
    nom['year'].append(jj[d])
    nom['gwl'].append(gw[d])
    nom['sst'].append(st[d])
    nom['t850'].append(tp[d])    

    cat['gwl0'].append(gw[d])
    cat['gwl1'].append(gw[d-3])
    cat['mon'].append(mon[mm[d]-1])

    if(st[d]<sstu): cat['sst'].append('cold')
    elif((st[d]>=sstu)&(st[d]<=ssto)): cat['sst'].append('warm')
    else: cat['sst'].append('hot')

    if(tp[d]<t850u): cat['t850'].append('icy')
    elif((tp[d]>=t850u)&(tp[d]<=t850o)): cat['t850'].append('cold')
    else: cat['t850'].append('warm')

    for c in city: 

        distance = (lo-city[c][0])**2 + (la-city[c][1])**2
        iy,ix = N.where(distance==distance.min())

        nom[c].append(dat[d,iy[0],ix[0]])

        if(dat[d,iy[0],ix[0]]<=15): cat[c].append('N')
        else: cat[c].append('Y')

ig = []

for g in go:

    id = N.where(g==gw)[0]

    ig.append(len(id))

ig = N.array(ig)
ig = N.argsort(ig)

go = go[ig[::-1]]

Attribution

P.figure(figsize=(12,6))

k = 0

for c in city:

    k = k+1

    ax = P.subplot(1,3,k)

    tmp = N.zeros((ng,nm),float)

    for g in range(ng):
        for m in range(nm):

            id = N.where((N.array(nom['month'])==mo[m])&(N.array(nom['gwl'])==go[g]))[0]

            tmp[g,m] = N.nanmean(N.array(nom[c])[id])

    P.imshow(tmp,cmap=P.get_cmap('YlGnBu'),aspect=0.45, vmin=0, vmax=10)

    ax = plt.gca();

    # Major ticks
    ax.set_xticks(N.arange(0,12,1))
    ax.set_yticks(N.arange(0,ng,1))

    # Labels for major ticks
    ax.set_xticklabels(mon,fontsize=8)
    ax.set_yticklabels(go,fontsize=8)

    # Minor ticks
    ax.set_xticks(N.arange(-.5,12,1), minor=True)
    ax.set_yticks(N.arange(-.5,ng,1), minor=True)

    ax.grid(which='minor',color='k',linestyle='-',linewidth=1)

    P.title(c,fontsize=14,weight='bold')

P.tight_layout()

plt.savefig('./img/decision.png',dpi=240,transparent=False,bbox_inches='tight',pad_inches=0.0)

Transition

tmp = N.zeros((nm,ng,ng),float)

for d in range(1,nd):

    im = mm[d]-1
    ig = N.where(gw[d-1]==go)[0]
    jg = N.where(gw[d]==go)[0]

    tmp[im,ig,jg] = tmp[im,ig,jg] + 1

tmp = tmp/float(nj)

tmp[tmp==0] = N.nan

P.figure(figsize=(15,5))

for m in range(nm):

    ax = P.subplot(2,6,1+m)

    P.imshow(tmp[m,:,:],cmap=P.get_cmap('Spectral_r'),vmin=0, vmax=1)

    ax = plt.gca();

    # Major ticks
    ax.set_xticks(N.arange(0,ng,1))
    ax.set_yticks(N.arange(0,ng,1))

    # Labels for major ticks
    ax.set_xticklabels(go,fontsize=5,rotation=90)
    ax.set_yticklabels(go,fontsize=5)

    # Minor ticks
    ax.set_xticks(N.arange(-.5,30,1), minor=True)
    ax.set_yticks(N.arange(-.5,30,1), minor=True)

    ax.grid(which='minor',color='k',linestyle='-',linewidth=0.5)

    P.title(mon[m],fontsize=14,weight='bold')

P.tight_layout()

plt.savefig('./img/decision.png',dpi=240,transparent=False,bbox_inches='tight',pad_inches=0.0)

SST

tmp = N.zeros((nj,nm),float)

for j in range(nj):
    for m in range(nm):

        id = N.where((jj==jo[j])&(mm==mo[m]))[0]

        tmp[j,m] = N.mean(st[id])

P.figure(figsize=(15,5))

P.imshow(tmp.T,cmap=P.get_cmap('bwr'),vmin=-2,vmax=2)

ax = plt.gca();

# Major ticks
ax.set_xticks(N.arange(0,nj,1))
ax.set_yticks(N.arange(0,nm,1))

# Labels for major ticks
ax.set_xticklabels(jo,fontsize=10,rotation=90)
ax.set_yticklabels(mon,fontsize=10)

# Minor ticks
ax.set_xticks(N.arange(-.5,nj,1), minor=True)
ax.set_yticks(N.arange(-.5,nm,1), minor=True)

ax.grid(which='minor',color='k',linestyle='-',linewidth=0.5)

P.tight_layout()

plt.savefig('./img/decision.png',dpi=240,transparent=False,bbox_inches='tight',pad_inches=0.0)

Modelling

df = pd.DataFrame.from_dict(cat)

trees = DecisionTreeClassifier()
X_train,X_test,y_train,y_test = train_test_split(df[['mon','sst','t850','gwl0','gwl1']],df[ort],test_size=0.3,random_state=1)

x_train = pd.get_dummies(X_train,drop_first=True)
x_test = pd.get_dummies(X_test,drop_first=True)

clf = trees.fit(x_train,y_train)
y_pred = clf.predict(x_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

print ([*x_train])

tree.export_graphviz(clf,out_file="./img/tree.dot",feature_names = [*x_train],class_names=['N','Y'],filled = True,max_depth=7,impurity=False)

y_test = N.array(y_test)
X_test = N.array(X_test)

nd = len(y_test)
do = N.arange(nd)

test = N.zeros(nd,float)
pred = N.zeros(nd,float)
both = N.zeros(nd,float)

for d in range(nd):

    if(y_test[d]=='Y'): 

       test[d] = 1

       print (X_test[d])

print ('---------------')

for d in range(nd):

    if(y_pred[d]=='Y'): 

       pred[d] = 1

       print (X_test[d])

print ('---------------')

for d in range(nd):

    if((y_pred[d]=='Y')&(y_test[d]=='Y')): 

       print (X_test[d])

       both[d] = 1

P.figure(figsize=(14,7))

X_test = N.array(X_test)
X_train = N.array(X_train)

id = N.where(y_test=='Y')[0]
jd = N.where(y_train=='Y')[0]

P.subplot(2,3,1)

yo = []
zo = []

for x in mon:

    ij = N.where(N.array(X_test[id,0])==x)[0]
    yo.append(len(ij))

    ij = N.where(N.array(X_train[jd,0])==x)[0]
    zo.append(len(ij))

yo = N.array(yo)
zo = N.array(zo)

xo = N.arange(len(yo))

P.bar(xo,yo,width=0.8,color='g',edgecolor='k',lw=1,alpha=0.5)
#P.bar(xo,zo,width=0.8,color='g',lw=0.5,alpha=0.5)

P.xticks(xo,mon,rotation=90)
P.ylabel('Number')
P.title('Months')

P.subplot(2,3,2)

so = ['cold','warm','hot']

zo = []
yo = []

for x in so:

    ij = N.where(N.array(X_test[id,1])==x)[0]
    yo.append(len(ij))

    ij = N.where(N.array(X_train[jd,1])==x)[0]
    zo.append(len(ij))


yo = N.array(yo)
zo = N.array(zo)
xo = N.arange(len(yo))

P.bar(xo,yo,width=0.8,color='r',edgecolor='k',lw=1,alpha=0.5)
#P.bar(xo,zo,width=0.8,color='r',lw=0.5,alpha=0.5)

P.xticks(xo,so)
P.ylabel('Number')

P.title('SSTA')

P.subplot(2,3,3)

so = ['warm','cold','icy']
zo = []
yo = []

for x in so:

    ij = N.where(N.array(X_test[id,2])==x)[0]
    yo.append(len(ij))

    ij = N.where(N.array(X_train[jd,2])==x)[0]
    zo.append(len(ij))

yo = N.array(yo)
zo = N.array(zo)
xo = N.arange(len(yo))

P.bar(xo,yo,width=0.8,color='b',edgecolor='k',lw=1,alpha=0.5)
#P.bar(xo,zo,width=0.8,color='b',lw=0.5,alpha=0.5)

P.xticks(xo,so)
P.ylabel('Number')

P.title('T850A')

ax = P.subplot(2,1,2)

so = go
yo = []
zo = []

for x in so:

    ij = N.where(N.array(X_test[id,3])==x)[0]
    yo.append(len(ij))

    ij = N.where(N.array(X_train[jd,3])==x)[0]
    zo.append(len(ij))

yo = N.array(yo)
zo = N.array(zo)
xo = N.arange(len(yo))

P.bar(xo,yo,width=0.8,color='m',edgecolor='k',lw=1,alpha=0.5)
#P.bar(xo,zo,width=0.8,color='m',lw=0.5,alpha=0.5)

P.xticks(xo,so)
P.xlim(-1,ng+1)
P.ylabel('Number')

P.title('GWL')
P.xticks(rotation=90)

x = N.where(test==1)[0]
y = N.where(pred==1)[0]
z = N.where(both==1)[0]

print (len(x),len(y),len(z))

at = AnchoredText('W5E5-'+ort+': %i|%i|%i (>15mm/d)'%(len(x),len(y),len(z)),prop=dict(size=18,weight='bold'),frameon=True,loc='upper right')
at.patch.set_boxstyle("round,pad=0.,rounding_size=0.1")
ax.add_artist(at)

plt.tight_layout()

plt.savefig('./img/decision_%s.png'%ort,dpi=240,transparent=False,bbox_inches='tight',pad_inches=0.0)