Predict fuel flow rate of airplanes

In this Project I am going to predicti fuel flow rate of airplanes during different phases of flight.

There are 8 different phases of flight in the dataset.

In [1]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.style.use('fivethirtyeight')
import zipfile
import pandas as pd
from datetime import datetime
from babel.dates import format_timedelta
import warnings
warnings.filterwarnings('ignore')

list_ = []
z = zipfile.ZipFile('CAX_Train_1.zip')
allFiles = z.namelist()
df_from_each_file = (pd.read_csv(z.open(f), index_col=False) for f in allFiles[1:2])
frame   = pd.concat(df_from_each_file, ignore_index=True)

def to_dattim(y, m, d, h, mi, s):
    return pd.Timestamp(int(y), int(m), int(d), int(h), int(mi), int(s))

a = frame.columns

frame["datetime"] =  frame.apply(lambda row: to_dattim(row['Year'], row['Month'], row['Day'], row['Hour'], row['Minute'], row['Second']), axis=1)

temp = frame.groupby(['Flight_instance_ID']).datetime.first()

def minus_first(x,y,z):
    return y - temp[x]

frame.datetime = frame.apply(lambda row: minus_first(row['Flight_instance_ID'], row['datetime'],temp), axis=1)
In [2]:
lph = ['Unknown','Preflight','Taxi','Takeoff', 'Climb','Cruise','Approach','Rollout']
dat = pd.Timestamp('2017-1-1 00:00:00')
fig, ax = plt.subplots(figsize=(16, 12))
for key, grp in frame.groupby(['Flight_instance_ID','PH']):
    ph = grp['PH'].head(1).reset_index(drop=True)[0]
    grp['datetime']=grp['datetime'].apply(lambda x: x+dat)
    #bla = [datetime(i) for  i in grp['datetime'].dt.time]
    #print bla
    if ph<7:
        ax.plot(grp['datetime'], grp['FF'],'o', lw = 2., label = lph[ph])
    else:
        ax.plot(grp['datetime'], grp['FF'],'o', lw = 2., label = lph[ph],color='black')
fig.canvas.draw()
labels = [item.get_text() for item in ax.get_xticklabels()]
labels = [i[3:] for i in labels]
labels = labels[1:]
#labels[1] = 'Testing'
ax.set_xticklabels(labels, fontsize = 18)
labels = [item.get_text() for item in ax.get_yticklabels()]
ax.set_yticklabels(labels, fontsize = 18)
plt.legend(fontsize = 24)
plt.xticks(rotation=45)
plt.xlim(xmin = dat)
plt.ylabel('Fuel Flow Rate',fontsize = 20)
plt.xlabel('Time of Flight, [m]',fontsize = 20)

plt.show()

There are around 220 different parameters

In [3]:
print "Number of columns:",len(a)
Number of columns: 226

The dataset is 12 Gb and 1000 flights

In [4]:
import matplotlib.pyplot as plt
import matplotlib.cm
import pandas as pd 
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.colors import Normalize

fig, ax = plt.subplots(figsize=(16,12))
# coordinates are taken from http://boundingbox.klokantech.com/

coorlist = [-133.15,24.29,-61.52,54.37]
rout_color = "orange"
bg_color = (0.0, 0.0, 0, 1.0)
coast_color = (204/255.0, 0, 153/255.0, 0.7)
fl_color = (204/255.0, 0, 153/255.0, 0.7)
m = Basemap(resolution='f', # c, l, i, h, f or None
            projection='merc',
            llcrnrlon=coorlist[0], llcrnrlat= coorlist[1], urcrnrlon=coorlist[2], urcrnrlat=coorlist[3])
m.drawcountries()
m.drawcoastlines(color=coast_color, linewidth=1.0)
m.fillcontinents(color=bg_color, lake_color=bg_color)
m.drawmapboundary(fill_color=bg_color)
path = pd.read_csv('ALL_data.csv', index_col=False)
resmap = path.groupby(['Flight_instance_ID'])

for ni, pos in enumerate(resmap):
    bla = pos[1]
    x1 = list(bla.LONP)
    y1 = list(bla.LATP)
    x, y = m(x1,y1)
    # in case of out of box, gives an error
    try:
        m.drawgreatcircle(x1[0], y1[0], x1[-1], y1[-1], linewidth=0.5, color=rout_color, alpha = 0.1)
    except:
        pass
plt.show()
In [5]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[5]: