import matplotlib.pyplot as plt
import matplotlib.cm
import pandas as pd
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.colors import Normalize
import warnings
warnings.filterwarnings('ignore')
fig, ax = plt.subplots(figsize=(16,12))
# coordinates are taken from http://boundingbox.klokantech.com/
coorlist = [-133.15,24.29,-61.52,54.37]
rout_color = (204/255.0, 0, 153/255.0, 0.7)#"orange"
bg_color = (0.0, 0.0, 0, 0.0)
coast_color = (0.0, 0.0, 0, 1.0)
#coast_color = (204/255.0, 0, 153/255.0, 0.7)
fl_color = (204/255.0, 0, 153/255.0, 0.7)
m = Basemap(resolution='l', # c, l, i, h, f or None
projection='merc',
llcrnrlon=coorlist[0], llcrnrlat= coorlist[1], urcrnrlon=coorlist[2], urcrnrlat=coorlist[3])
m.drawcountries()
m.drawcoastlines(color=coast_color, linewidth=1.0)
m.fillcontinents(color=bg_color)
m.drawmapboundary(fill_color=bg_color)
m.shadedrelief()
path = pd.read_csv('ALL_data.csv', index_col=False)
resmap = path.groupby(['Flight_instance_ID'])
for ni, pos in enumerate(resmap):
bla = pos[1]
x1 = list(bla.LONP)
y1 = list(bla.LATP)
x, y = m(x1,y1)
# in case of out of box, gives an error
try:
m.drawgreatcircle(x1[0], y1[0], x1[-1], y1[-1], linewidth=0.75, color="red", alpha = 0.2)
m.plot(x, y, 'o', markersize='6', color='#444444', alpha=0.1)
except:
pass
plt.show()
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import zipfile
import pandas as pd
from datetime import datetime
from babel.dates import format_timedelta
#list_ = []
#z = zipfile.ZipFile('CAX_Train_1.zip')
#allFiles = z.namelist()
#df_from_each_file = (pd.read_csv(z.open(f), index_col=False) for f in allFiles[0:1])
#frame = pd.concat(df_from_each_file, ignore_index=True)
list_f_ids = ['676200403231928',
'676200403201250',
'676200306100458',
'676200408241718',
'676200404101643']
st = str(list_f_ids[2])
moddir = 'models_rand_allf/ind_2000run_nocoor/'#'model_2/'
frame = pd.read_csv('some_flights/'+st+'.csv')
#'test_676200312080912.csv'
def to_dattim(y, m, d, h, mi, s):
return pd.Timestamp(int(y), int(m), int(d), int(h), int(mi), int(s))
a = frame.columns
frame["datetime"] = frame.apply(lambda row: to_dattim(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute'], row['Second']), axis=1)
temp = frame.groupby(['Flight_instance_ID']).datetime.first()
def minus_first(x,y,z):
return y - temp[x]
frame.datetime = frame.apply(lambda row: minus_first(row['Flight_instance_ID'], row['datetime'],temp), axis=1)
import dill
Mod_num = str(5)
modelpath = "models_best/model"+Mod_num+"_fix/model_new_all_phase_"+Mod_num+".pkd"
with open(modelpath,'r') as f:
modelx = dill.load(f)
feat_imp = pd.Series(modelx.get_fscore()).sort_values(ascending=False)
feat_imp[:10].plot(kind='bar', title='Feature Importances')
plt.ylabel('Feature Importance Score')
print modelx.best_score
import dill
def plot_pred(Mod_num, ff_id, axi, start, shuf = False, pred = 1):
lc = ['#008fd5', '#fc4f30', '#e5ae38', '#6d904f', '#8b8b8b', '#810f7c']
zipnam = 'models_best/model'+Mod_num+'/phase'+Mod_num+'_test_separated.zip'
z = zipfile.ZipFile(zipnam)
finzip = 'phase'+Mod_num+'_test_separated/phase'+Mod_num+'_'+ff_id+'.csv'
df = pd.read_csv(z.open(finzip), index_col=False)
modelpath = "models_best/model"+Mod_num+"_fix/model_new_all_phase_"+Mod_num+".pkd"
msi = 6
if shuf:
df = df.sample(frac = 1)
msi = 3
ln = len(df.FF)
y = df.pop('FF')
##strd = pd.Timestamp('2017-1-1 00:00:00')+ pd.Timedelta(start, unit='s')
##endd = strd + pd.Timedelta(ln-1, unit='s')
##x = pd.date_range(start=strd, end=endd, freq = 's')
x = range(start,ln +start)
if Mod_num =='4':
axi.plot(x, y,'o',ms=msi, label = 'Climb')
elif Mod_num =='5':
axi.plot(x, y,'o',ms=msi, label = 'Cruise')
if pred:
df = df.drop(['X','Y','DWPT','Unnamed: 0','Unnamed: 0.1', 'Flight_instance_ID'], axis = 1)
xtest = xgb.DMatrix(df, label = y)
with open(modelpath,'r') as f:
modelx = dill.load(f)
pred = modelx.predict(xtest)
if Mod_num =='4':
axi.plot(x, pred, color = 'red', lw =0.5, label = 'pred. Climb')
elif Mod_num =='5':
axi.plot(x, pred, color = 'black', lw =0.5, label = 'pred. Cruise')
return ln
### plot all phases
import warnings
import sklearn.metrics
import xgboost as xgb
from ipywidgets import StaticInteract, RangeWidget, RadioWidget, DropDownWidget
warnings.filterwarnings('ignore')
lph = ['Unknown','Preflight','Taxi','Takeoff', 'Climb','Cruise','Approach','Rollout']
dat = pd.Timestamp('2017-1-1 00:00:00')
datmax = dat+ frame.datetime.iloc[-1]
def plot_phase(ax,xmi = dat, xma = datmax):
lc = ['#008fd5', '#fc4f30', '#e5ae38', '#6d904f', '#8b8b8b', '#810f7c']
for key, grp in frame.groupby(['Flight_instance_ID','PH']):
ph = grp['PH'].head(1).reset_index(drop=True)[0]
grp['datetime']=grp['datetime'].apply(lambda x: x+dat)
#print grp.datetime.iloc[0], dat, type(grp.datetime.iloc[0]), type(dat)
if ph<7 and ph!=4 and ph!=5:
ax.plot(grp['datetime'], grp['FF'],'o', lw = 2., label = lph[ph],color = lc[ph-1])
elif ph ==4 or ph == 5:
ax.plot(grp['datetime'], grp['FF'],'o', lw = 2., label = lph[ph])
else:
ax.plot(grp['datetime'], grp['FF'],'o', lw = 2., label = lph[ph],color='black')
if ph == 4:
# take series for 4th phase to plot
bla = grp['datetime']
elif ph ==5:
bla5 = grp['datetime']
fig.canvas.draw()
labels = [item.get_text() for item in ax.get_xticklabels()]
labels = [i[3:] for i in labels]
labels = labels[1:]
#labels[1] = 'Testing'
ax.set_xticklabels(labels, fontsize = 18)
labels = [item.get_text() for item in ax.get_yticklabels()]
ax.set_yticklabels(labels, fontsize = 18)
plt.xticks(rotation=45)
ax.set_xlim(xmin = xmi, xmax = xma)
plt.ylabel('Fuel Flow Rate',fontsize = 20)
plt.xlabel('Time of Flight, [min]',fontsize = 20)
#matplotlib.style.use('fivethirtyeight')
fig, ax = plt.subplots(figsize=(15, 9))
plot_phase(ax)
plt.legend(fontsize = 20)
plt.savefig('fig1_allfeatures.png')
plt.show()
fig, ax = plt.subplots(figsize=(15, 9))
a = plot_pred('4', st, ax, start = 0, shuf = 0, pred = 0)
plot_pred( '5', st, ax, start = a, shuf = 0, pred = 0)
plt.legend(fontsize = 24)
plt.savefig('fig2_45features.png')
fig.canvas.draw()
#labels = [item.get_text() for item in ax.get_xticklabels()]
#labels = [i for i in labels]
#labels = labels[1:]
ax.set_xticklabels([str(i) for i in range(-1000,4001,1000)], fontsize = 18)
labels = [item.get_text() for item in ax.get_yticklabels()]
ax.set_yticklabels(labels, fontsize = 18)
plt.xticks(rotation=45)
#ax.set_xlim(xmin = pd.Timestamp('2017-1-1 00:00:00'))
plt.ylabel('fuel flow rate',fontsize = 20)
plt.xlabel('index',fontsize = 20)
plt.show()
fig, ax = plt.subplots(figsize=(15, 9))
a = plot_pred('4', st, ax, start = 0, shuf = 1, pred = 0)
plot_pred( '5', st, ax, start = a, shuf = 1, pred = 0)
plt.legend(fontsize = 24)
plt.savefig('fig3_45features_shuf.png')
fig.canvas.draw()
#labels = [item.get_text() for item in ax.get_xticklabels()]
#labels = [i for i in labels]
#labels = labels[1:]
ax.set_xticklabels([str(i) for i in range(-1000,4001,1000)], fontsize = 18)
labels = [item.get_text() for item in ax.get_yticklabels()]
ax.set_yticklabels(labels, fontsize = 18)
plt.xticks(rotation=45)
#ax.set_xlim(xmin = pd.Timestamp('2017-1-1 00:00:00'))
plt.ylabel('fuel flow rate',fontsize = 20)
plt.xlabel('index',fontsize = 20)
plt.show()
fig, ax = plt.subplots(figsize=(15, 9))
a = plot_pred('4', st, ax, start = 0, shuf = 1, pred = 1)
plot_pred( '5', st, ax, start = a, shuf = 1, pred = 1)
plt.legend(fontsize = 24)
plt.savefig('fig4_45features_shuf_pred.png')
fig.canvas.draw()
#labels = [item.get_text() for item in ax.get_xticklabels()]
#labels = [i for i in labels]
#labels = labels[1:]
ax.set_xticklabels([str(i) for i in range(-1000,4001,1000)], fontsize = 18)
labels = [item.get_text() for item in ax.get_yticklabels()]
ax.set_yticklabels(labels, fontsize = 18)
plt.xticks(rotation=45)
#ax.set_xlim(xmin = pd.Timestamp('2017-1-1 00:00:00'))
plt.ylabel('fuel flow rate',fontsize = 20)
plt.xlabel('index',fontsize = 20)
plt.show()
fig, ax = plt.subplots(figsize=(15, 9))
#min_test=['676200308221151',
# '676200406201308',
# '676200406201655',
# '676201003110757',
# '676200405101740',
# '676200306121943',
# '676200307021338',
# '676200307030733',
# '676200406181158',
# '676200403090417']
#max_test = ['676200310231759',
# '676200306171503',
# '676200306180951',
# '676200406160506',
# '676200405081024',
# '676200402101617',
# '676200305141328']
#st = max_test[0]
a = plot_pred('4', st, ax, start = 0, shuf = 0, pred = 1)
plot_pred( '5', st, ax, start = a, shuf = 0, pred = 1)
plt.legend(fontsize = 24)
plt.savefig('fig4_45features_shuf_pred.png')
fig.canvas.draw()
#labels = [item.get_text() for item in ax.get_xticklabels()]
#labels = [i for i in labels]
#labels = labels[1:]
ax.set_xticklabels([str(i) for i in range(-1000,4001,1000)], fontsize = 18)
labels = [item.get_text() for item in ax.get_yticklabels()]
ax.set_yticklabels(labels, fontsize = 18)
plt.xticks(rotation=45)
#ax.set_xlim(xmin = pd.Timestamp('2017-1-1 00:00:00'))
plt.ylabel('fuel flow rate',fontsize = 20)
plt.xlabel('index',fontsize = 20)
plt.show()
import matplotlib
import matplotlib.pyplot as plt
import dill
import zipfile
import pandas as pd
import xgboost as xgb
import glob
import numpy as np
import matplotlib.mlab as mlab
from sklearn.metrics import r2_score
def get_stats(li):
liflights = []
liflights_id = []
allpoints = []
r2 = 0
for fn in li:
fid = '6'+fn.split('.csv')[0].split('_6')[-1]
df = pd.read_csv(fn)
se = (df.FF-df.Prediction)/df.FF
allpoints += list(se.as_matrix())
liflights.append(np.mean(se))
liflights_id.append((np.mean(se),fid))
r2 +=r2_score(df.FF, df.Prediction)
return (allpoints, liflights, liflights_id,r2/len(liflights))
li = glob.glob('models_best/model4/phase4_test/residuals_phase4_67*')
all_test4, liflights_test4, ids_test4, r2_4te = get_stats(li)
li = glob.glob('models_best/model4/phase4_train/residuals_phase4_67*')
all_train4, liflights_train4, ids_train4, r2_4tr = get_stats(li)
li = glob.glob('models_best/model5/phase5_test/residuals_phase5_6*')
all_test5, liflights_test5, ids_test5, r2_5te = get_stats(li)
li = glob.glob('models_best/model5/phase5_train/residuals_phase5_6*')
all_train5, liflights_train5, ids_train5, r2_5tr = get_stats(li)
f, ((ax1), (ax2)) = plt.subplots(1, 2, figsize=(15, 4.5), sharex='row')#, sharey='row')
n, bins, patches = ax1.hist(np.array(liflights_test4), 25, normed=0, alpha=0.75, label = 'test')
n, bins, patches = ax1.hist(np.array(liflights_train4), 25, normed=0, alpha=0.75, label = 'train')
#m1 = max(liflights_test5)
#m2 = max(liflights_train5)
n, bins, patches = ax2.hist(np.array(liflights_test5), 25, normed=0, alpha=0.75, label = 'test')
n, bins, patches = ax2.hist(np.array(liflights_train5), 25, normed=0, alpha=0.75, label = 'train')
ax1.set_xlabel('(Pr. - F_R) for each fligh',fontsize = 18)
ax2.set_xlabel('(Pr. - F_R) for each fligh',fontsize = 18)
ax1.legend(fontsize = 24)
ax2.legend(fontsize = 24)
ax1.set_ylabel('freq. of flights in bin',fontsize = 18)
ax1.set_title('Phase 4: Climb')
ax2.set_title('Phase 5: Cruise')
ax1.tick_params(axis='both', labelsize=18)
ax2.tick_params(axis='both', labelsize=18)
#plt.ylim(ymax=80)
plt.show()
f, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2, 2, figsize=(15, 8), sharex='col')
def plothi(all_, liflights_, sc, axi):
mu = 0
scale = 6000
variance = np.var(all_)
coef = 1./np.sqrt(len(all_)/len(liflights_))
sigma = np.sqrt(variance)*coef
x = np.linspace(-10./scale, 10./scale, 500)
thr_max = np.percentile(np.array(liflights_),90)
thr_min = np.percentile(np.array(liflights_),10)
hi = np.array(liflights_)
n, bins, patches = axi.hist([hi[np.multiply(hi<thr_max,hi>thr_min)],hi[hi<thr_min],hi[hi>thr_max]], 25, normed=1, alpha=0.75, color = ['C0','C1','C1'], stacked = True, label = 'E_flights')
axi.plot(x,mlab.normpdf(x, mu, sigma)*sc, lw = 2, color = 'C8',label = 'E_model')
axi.legend(fontsize = 20)
axi.tick_params(axis='both', labelsize=18)
plothi(all_train4, liflights_train4, 1./3., ax1)
plothi(all_test4, liflights_test4, 0.083, ax2)
plothi(all_train5, liflights_train5, 1./3., ax3)
plothi(all_test5, liflights_test5, 0.055, ax4)
ax3.set_xlabel('residuals',fontsize = 18)
ax4.set_xlabel('residuals',fontsize = 18)
ax1.set_ylabel('freq. of flights',fontsize = 18)
ax3.set_ylabel('freq. of flights',fontsize = 18)
ax1.set_title('Phase 4: Climb, Train sample', fontsize = 18)
ax2.set_title('Phase 4: Climb, Test sample', fontsize = 18)
ax3.set_title('Phase 5: Climb, Train sample', fontsize = 18)
ax4.set_title('Phase 5: Climb, Test sample', fontsize = 18)
plt.show()
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')