Predict fuel flow rate of airplanes

In this Project I am going to predicti fuel flow rate of airplanes during different phases of flight.

The dataset is 12 Gb and 1000 flights

In [1]:
import matplotlib.pyplot as plt
import matplotlib.cm
import pandas as pd 
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.colors import Normalize
import warnings
warnings.filterwarnings('ignore')

fig, ax = plt.subplots(figsize=(16,12))
# coordinates are taken from http://boundingbox.klokantech.com/

coorlist = [-133.15,24.29,-61.52,54.37]
rout_color = (204/255.0, 0, 153/255.0, 0.7)#"orange"
bg_color = (0.0, 0.0, 0, 0.0)
coast_color = (0.0, 0.0, 0, 1.0)
#coast_color = (204/255.0, 0, 153/255.0, 0.7)
fl_color = (204/255.0, 0, 153/255.0, 0.7)
m = Basemap(resolution='l', # c, l, i, h, f or None
            projection='merc',
            llcrnrlon=coorlist[0], llcrnrlat= coorlist[1], urcrnrlon=coorlist[2], urcrnrlat=coorlist[3])
m.drawcountries()
m.drawcoastlines(color=coast_color, linewidth=1.0)
m.fillcontinents(color=bg_color)
m.drawmapboundary(fill_color=bg_color)
m.shadedrelief()
path = pd.read_csv('ALL_data.csv', index_col=False)
resmap = path.groupby(['Flight_instance_ID'])

for ni, pos in enumerate(resmap):
    bla = pos[1]
    x1 = list(bla.LONP)
    y1 = list(bla.LATP)
    x, y = m(x1,y1)
    # in case of out of box, gives an error
    try:
        m.drawgreatcircle(x1[0], y1[0], x1[-1], y1[-1], linewidth=0.75, color="lightcoral", alpha = 0.2)
        m.plot(x, y, 'o', markersize='6', color='#444444', alpha=0.1)
    except:
        pass
plt.show()
In [46]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.style.use('fivethirtyeight')
import zipfile
import pandas as pd
from datetime import datetime
from babel.dates import format_timedelta


#list_ = []
#z = zipfile.ZipFile('CAX_Train_1.zip')
#allFiles = z.namelist()
#df_from_each_file = (pd.read_csv(z.open(f), index_col=False) for f in allFiles[0:1])
#frame   = pd.concat(df_from_each_file, ignore_index=True)
frame = pd.read_csv('some_flights/676200312080912.csv')
#'test_676200312080912.csv'
def to_dattim(y, m, d, h, mi, s):
    return pd.Timestamp(int(y), int(m), int(d), int(h), int(mi), int(s))

a = frame.columns

frame["datetime"] =  frame.apply(lambda row: to_dattim(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute'], row['Second']), axis=1)

temp = frame.groupby(['Flight_instance_ID']).datetime.first()

def minus_first(x,y,z):
    return y - temp[x]

frame.datetime = frame.apply(lambda row: minus_first(row['Flight_instance_ID'], row['datetime'],temp), axis=1)

There are 8 different phases of flight in the dataset.

There are around 220 different parameters

I use XGBoost to predict Climbing phase.

In [48]:
### plot all phases
import warnings
warnings.filterwarnings('ignore')
lph = ['Unknown','Preflight','Taxi','Takeoff', 'Climb','Cruise','Approach','Rollout']
dat = pd.Timestamp('2017-1-1 00:00:00')
fig, ax = plt.subplots(figsize=(16, 12))
for key, grp in frame.groupby(['Flight_instance_ID','PH']):
    ph = grp['PH'].head(1).reset_index(drop=True)[0]
    grp['datetime']=grp['datetime'].apply(lambda x: x+dat)
    if ph<7:
        ax.plot(grp['datetime'], grp['FF'],'o', lw = 2., label = lph[ph])
    else:
        ax.plot(grp['datetime'], grp['FF'],'o', lw = 2., label = lph[ph],color='black')
    if ph == 4:
        # take series for 4th phase to plot
        bla = grp['datetime'] 
fig.canvas.draw()
labels = [item.get_text() for item in ax.get_xticklabels()]
labels = [i[3:] for i in labels]
labels = labels[1:]
#labels[1] = 'Testing'
ax.set_xticklabels(labels, fontsize = 18)
labels = [item.get_text() for item in ax.get_yticklabels()]
ax.set_yticklabels(labels, fontsize = 18)

plt.xticks(rotation=45)
plt.xlim(xmin = dat)
plt.ylabel('Fuel Flow Rate',fontsize = 20)
plt.xlabel('Time of Flight, [m]',fontsize = 20)

#### plot models prediction for 4 phase
import dill
def pred_model(fname):
    frame = pd.read_csv(fname)
    y = frame.pop('FF')
    frame = frame.drop(u'Flight_instance_ID',axis = 1)
    frame = frame.drop(u'Unnamed: 0',axis = 1)
    with open('model_2/model_all_phase_4.pkd', 'r') as f:
        model = dill.load(f)
    frame_norm = (frame - frame.mean()) / (frame.max() - frame.min())
    pred = model.predict(frame_norm)
    return pred

pred = pred_model('model_2/test_676200312080912.csv')
plt.plot(bla, pred, '-', lw = 0.5, color = 'black', label = 'pred. Climb')
plt.legend(fontsize = 24)
plt.show()

Climb trajectories differ

In [41]:
import matplotlib.pyplot as plt
import matplotlib.cm
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.colors import Normalize
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import numpy as np


frame = pd.read_csv("data/ALL_data_phase_4.csv", nrows = 50000)

#frame = frame[frame.Flight_instance_ID == 676200307020557]

def to_dattim(y, m, d, h, mi, s):
    return pd.Timestamp(int(y), int(m), int(d), int(h), int(mi), int(s))

a = frame.columns

frame["datetime"] =  frame.apply(lambda row: to_dattim(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute'], row['Second']), axis=1)

temp = frame.groupby(['Flight_instance_ID']).datetime.first()

def minus_first(x,y,z):
    return y - temp[x]

frame.datetime = frame.apply(lambda row: minus_first(row['Flight_instance_ID'], row['datetime'],temp), axis=1)

frame['datetime']=frame['datetime'].apply(lambda x: x/ np.timedelta64(1, 's'))

frame=frame[frame['datetime']<1200]


def appendSpherical_np(x,y,z):
    #ptsnew =np.zeros(xyz.shape)
    xy = x**2 + y**2
    R = np.sqrt(xy + z**2)
    PHI = np.arctan2(np.sqrt(xy), z)
    # for elevation angle defined from Z-axis down
    #ptsnew[:,4] = np.arctan2(z, np.sqrt(xy))
    # for elevation angle defined from XY-plane up
    THETA = np.arctan2(y, x)
    return R, PHI, THETA

def plot_traj(df):
    zl = [i for i in df.ALT_Mean.head(1)]
    xl = [i for i in df.LATP.head(1)]
    yl = [i for i in df.LONP.head(1)]
    z = abs(df.ALT_Mean - zl[0])
    x = abs(df.LATP - xl[0])
    y = abs(df.LONP - yl[0])
    R, PHI, THETA = appendSpherical_np(x.as_matrix(),y.as_matrix(), z.as_matrix())
    THETA1 = THETA[-1]
    THETA2 = THETA[-300]
    R1 = R[-1]
    R2 = R[-300]
    dist_10_1 = np.sqrt(R1**2 + R2**2 - 2*R1*R2*np.cos(THETA1-THETA2))
    beta = np.arcsin(R1*np.sin(THETA1-THETA2)/dist_10_1)
    THETA  = THETA - (beta+THETA2)#(np.pi-beta)
    #last_R = R[-1]
    #R = R/last_R
    X = R * np.sin(PHI) * np.cos(THETA)
    Y = R * np.sin(PHI) * np.sin(THETA)
    Z = R * np.cos(PHI)
    return X, Y, Z
    
#fig, ax = plt.subplots(figsize=(16,12))
#ax = fig.gca(projection='3d')

#for key, grp in frame.groupby(['Flight_instance_ID']):
#    plot_traj(grp)
    
#ax.legend()
#plt.show()
In [ ]:
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
import numpy as np
from plotly.graph_objs import *

layout = dict(
    width=800,
    height=700,
    autosize=True,
    title='Climb Trajectories',
    showlegend=False,
    scene=dict(
        xaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230,230)'
        ),
        yaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230,230)'
        ),
        zaxis=dict(
            gridcolor='rgb(255, 255, 255)',
            zerolinecolor='rgb(255, 255, 255)',
            showbackground=True,
            backgroundcolor='rgb(230, 230,230)'
        ),
        camera=dict(
            up=dict(
                x=0,
                y=0,
                z=1
            ),
            eye=dict(
                x=-1.7428,
                y=1.0707,
                z=0.7100,
            )
        ),
        aspectratio = dict( x=1, y=1, z=0.7 ),
        aspectmode = 'manual'
    ),
)

xl = np.zeros(1)
yl = np.zeros(1)
zl = np.zeros(1)
tracelist = []
for key, grp in frame.groupby(['Flight_instance_ID']):
    xi,yi,zi = plot_traj(grp)
    xs = pd.Series(xi)
    ys = pd.Series(yi)
    zs = pd.Series(zi)
    trace = go.Scatter3d(
        x=xs, y=ys, z=zs*0.3048,
        marker=dict(
            size=2,
            color=-grp.FF,
            colorscale='Viridis',
        ),
        line=dict(
            color='#1f77b4',
            width=0
        )
    )
    tracelist.append(trace)

data = Data(tracelist)

fig = dict(data=data, layout=layout)
#layout = go.Layout(showlegend=False)
py.iplot(fig, filename='Flights_Trajectories', height=700, validate=False)
In [44]:
from IPython.display import IFrame
IFrame('Flights_Trajectories.html', width=970, height=800)
Out[44]:
In [8]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[8]: