Skip to content

Commit

Permalink
updating competitor analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
g7xu committed Nov 2, 2024
1 parent e716994 commit 3d4080e
Showing 1 changed file with 201 additions and 0 deletions.
201 changes: 201 additions & 0 deletions impact_evaluation/eda_competitor_analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Competitor Analysis"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/tongConsultinInc/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3508: DtypeWarning: Columns (3,13,14) have mixed types.Specify dtype option on import or set low_memory=False.\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
]
}
],
"source": [
"# import data\n",
"\n",
"candidates_rt = pd.read_csv('../data/temproary_data/candidate_roundTrip_route.csv')\n",
"candidates_rt['round_trip_route_IATA'] = candidates_rt['round_trip_route_IATA'].apply(eval)\n",
"roundTrips = pd.read_csv('../data/temproary_data/round_trip_flights.csv')\n",
"roundTrips['round_trip_route_IATA'] = roundTrips['round_trip_route_IATA'].apply(eval)\n",
"roundTrips['inbound_FL_DATE'] = pd.to_datetime(roundTrips['inbound_FL_DATE'])\n",
"roundTrips['outbound_FL_DATE'] = pd.to_datetime(roundTrips['outbound_FL_DATE'])\n",
"airports_info = pd.read_csv('../data/cleaned_data/Airport_Codes.csv')\n",
"tickets_info = pd.read_csv('../data/cleaned_data/Tickets.csv')\n",
"\n",
"\n",
"tickets_info = tickets_info.assign(\n",
" sorted_route=tickets_info.apply(\n",
" lambda x: tuple(\n",
" sorted([x[\"ORIGIN_AIRPORT_IATA_CODE\"], x[\"DEST_AIRPORT_IATA_CODE\"]])\n",
" ),\n",
" axis=1,\n",
" )\n",
" )\n",
"\n",
"round_trip_profit = pd.read_csv('../data/temproary_data/roundTrip_profit.csv')\n",
"round_trip_profit['round_trip_route_IATA'] = round_trip_profit['round_trip_route_IATA'].apply(eval)\n",
"avg_ticket_price = pd.read_csv('../data/temproary_data/average_ticket_price.csv')\n",
"all_flights = pd.read_csv('../data/original_data/Flights.csv')\n",
"\n",
"\n",
"# gather relevent data\n",
"\n",
"airports_can = {i for pair in candidates_rt['round_trip_route_IATA'].values for i in pair}\n",
"\n",
"candidate_roundTrips = roundTrips[roundTrips['round_trip_route_IATA'].isin(candidates_rt['round_trip_route_IATA'])]\n",
"candidate_airports = airports_info[airports_info['AIRPORT_IATA_CODE'].apply(lambda x: x in airports_can)]\n",
"candidate_tickets = tickets_info[tickets_info['sorted_route'].isin(candidates_rt['round_trip_route_IATA'])]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Competitor associatd with each candidate round trip route"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"# find the operation carieer assoicated with the top 10 around trip route\n",
"roundTrip_op = candidate_roundTrips.groupby('round_trip_route_IATA')['inbound_OP_CARRIER'].apply(lambda x: set(x.value_counts().index)).reset_index()\n",
"roundTrip_op['op_count'] = roundTrip_op['inbound_OP_CARRIER'].apply(len)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"operating_carrier = pd.DataFrame(pd.Series(list(set([j for i in roundTrip_op['inbound_OP_CARRIER'].values for j in i])), name='OP_CARRIER'))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# the proportion of cancel flights associated with each operating carieer"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"all_flights['round_trip'] = all_flights.apply(lambda row : tuple(sorted([row['ORIGIN'], row['DESTINATION']])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
"op_prop_cancel = all_flights[(all_flights['round_trip'].isin(candidates_rt['round_trip_route_IATA'])) & (all_flights['OP_CARRIER'].isin(operating_carrier['OP_CARRIER']))].groupby(['OP_CARRIER', 'round_trip'])['CANCELLED'].mean().reset_index().rename(columns={'CANCELLED': 'prop_cancel'})"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
"op_prop_cancel.to_csv('../data/temproary_data/op_prop_cancel.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Average delay rate assoicated with each operating carieer"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
"all_flights = pd.concat(\n",
" [candidate_roundTrips[['inbound_DEP_DELAY', 'inbound_ARR_DELAY', 'inbound_OP_CARRIER' ,'round_trip_route_IATA']].rename(columns={'inbound_DEP_DELAY':'DEP_DELAY', 'inbound_ARR_DELAY':'ARR_DELAY', 'inbound_OP_CARRIER': 'OP_CARRIER'}, inplace= False),\n",
" candidate_roundTrips[['outbound_DEP_DELAY', 'outbound_ARR_DELAY', 'outbound_OP_CARRIER' ,'round_trip_route_IATA']].rename(columns={'outbound_DEP_DELAY':'DEP_DELAY', 'outbound_ARR_DELAY':'ARR_DELAY', 'outbound_OP_CARRIER': 'OP_CARRIER'}, inplace= False)],\n",
" axis=0,\n",
" ignore_index=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/0y/gbwmzjp93k12t06yhk8_2p7h0000gn/T/ipykernel_34231/2552485323.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
" all_flights.groupby(['OP_CARRIER', 'round_trip_route_IATA'])['DEP_DELAY', 'ARR_DELAY'].mean().reset_index().to_csv('../data/temproary_data/op_average_delay.csv', index=False)\n"
]
}
],
"source": [
"all_flights.groupby(['OP_CARRIER', 'round_trip_route_IATA'])['DEP_DELAY', 'ARR_DELAY'].mean().reset_index().to_csv('../data/temproary_data/op_average_delay.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "tongConsultinInc",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 3d4080e

Please sign in to comment.