-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Competitor Analysis" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 49, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import matplotlib.pyplot as plt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/opt/anaconda3/envs/tongConsultinInc/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3508: DtypeWarning: Columns (3,13,14) have mixed types.Specify dtype option on import or set low_memory=False.\n", | ||
" exec(code_obj, self.user_global_ns, self.user_ns)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# import data\n", | ||
"\n", | ||
"candidates_rt = pd.read_csv('../data/temproary_data/candidate_roundTrip_route.csv')\n", | ||
"candidates_rt['round_trip_route_IATA'] = candidates_rt['round_trip_route_IATA'].apply(eval)\n", | ||
"roundTrips = pd.read_csv('../data/temproary_data/round_trip_flights.csv')\n", | ||
"roundTrips['round_trip_route_IATA'] = roundTrips['round_trip_route_IATA'].apply(eval)\n", | ||
"roundTrips['inbound_FL_DATE'] = pd.to_datetime(roundTrips['inbound_FL_DATE'])\n", | ||
"roundTrips['outbound_FL_DATE'] = pd.to_datetime(roundTrips['outbound_FL_DATE'])\n", | ||
"airports_info = pd.read_csv('../data/cleaned_data/Airport_Codes.csv')\n", | ||
"tickets_info = pd.read_csv('../data/cleaned_data/Tickets.csv')\n", | ||
"\n", | ||
"\n", | ||
"tickets_info = tickets_info.assign(\n", | ||
" sorted_route=tickets_info.apply(\n", | ||
" lambda x: tuple(\n", | ||
" sorted([x[\"ORIGIN_AIRPORT_IATA_CODE\"], x[\"DEST_AIRPORT_IATA_CODE\"]])\n", | ||
" ),\n", | ||
" axis=1,\n", | ||
" )\n", | ||
" )\n", | ||
"\n", | ||
"round_trip_profit = pd.read_csv('../data/temproary_data/roundTrip_profit.csv')\n", | ||
"round_trip_profit['round_trip_route_IATA'] = round_trip_profit['round_trip_route_IATA'].apply(eval)\n", | ||
"avg_ticket_price = pd.read_csv('../data/temproary_data/average_ticket_price.csv')\n", | ||
"all_flights = pd.read_csv('../data/original_data/Flights.csv')\n", | ||
"\n", | ||
"\n", | ||
"# gather relevent data\n", | ||
"\n", | ||
"airports_can = {i for pair in candidates_rt['round_trip_route_IATA'].values for i in pair}\n", | ||
"\n", | ||
"candidate_roundTrips = roundTrips[roundTrips['round_trip_route_IATA'].isin(candidates_rt['round_trip_route_IATA'])]\n", | ||
"candidate_airports = airports_info[airports_info['AIRPORT_IATA_CODE'].apply(lambda x: x in airports_can)]\n", | ||
"candidate_tickets = tickets_info[tickets_info['sorted_route'].isin(candidates_rt['round_trip_route_IATA'])]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Competitor associatd with each candidate round trip route" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 53, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# find the operation carieer assoicated with the top 10 around trip route\n", | ||
"roundTrip_op = candidate_roundTrips.groupby('round_trip_route_IATA')['inbound_OP_CARRIER'].apply(lambda x: set(x.value_counts().index)).reset_index()\n", | ||
"roundTrip_op['op_count'] = roundTrip_op['inbound_OP_CARRIER'].apply(len)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 61, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"operating_carrier = pd.DataFrame(pd.Series(list(set([j for i in roundTrip_op['inbound_OP_CARRIER'].values for j in i])), name='OP_CARRIER'))\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# the proportion of cancel flights associated with each operating carieer" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 100, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"all_flights['round_trip'] = all_flights.apply(lambda row : tuple(sorted([row['ORIGIN'], row['DESTINATION']])), axis=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 116, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"op_prop_cancel = all_flights[(all_flights['round_trip'].isin(candidates_rt['round_trip_route_IATA'])) & (all_flights['OP_CARRIER'].isin(operating_carrier['OP_CARRIER']))].groupby(['OP_CARRIER', 'round_trip'])['CANCELLED'].mean().reset_index().rename(columns={'CANCELLED': 'prop_cancel'})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 124, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"op_prop_cancel.to_csv('../data/temproary_data/op_prop_cancel.csv', index=False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Average delay rate assoicated with each operating carieer" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 118, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"all_flights = pd.concat(\n", | ||
" [candidate_roundTrips[['inbound_DEP_DELAY', 'inbound_ARR_DELAY', 'inbound_OP_CARRIER' ,'round_trip_route_IATA']].rename(columns={'inbound_DEP_DELAY':'DEP_DELAY', 'inbound_ARR_DELAY':'ARR_DELAY', 'inbound_OP_CARRIER': 'OP_CARRIER'}, inplace= False),\n", | ||
" candidate_roundTrips[['outbound_DEP_DELAY', 'outbound_ARR_DELAY', 'outbound_OP_CARRIER' ,'round_trip_route_IATA']].rename(columns={'outbound_DEP_DELAY':'DEP_DELAY', 'outbound_ARR_DELAY':'ARR_DELAY', 'outbound_OP_CARRIER': 'OP_CARRIER'}, inplace= False)],\n", | ||
" axis=0,\n", | ||
" ignore_index=True\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 123, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/var/folders/0y/gbwmzjp93k12t06yhk8_2p7h0000gn/T/ipykernel_34231/2552485323.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", | ||
" all_flights.groupby(['OP_CARRIER', 'round_trip_route_IATA'])['DEP_DELAY', 'ARR_DELAY'].mean().reset_index().to_csv('../data/temproary_data/op_average_delay.csv', index=False)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"all_flights.groupby(['OP_CARRIER', 'round_trip_route_IATA'])['DEP_DELAY', 'ARR_DELAY'].mean().reset_index().to_csv('../data/temproary_data/op_average_delay.csv', index=False)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "tongConsultinInc", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.19" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |