From 0745e93cae2f47913ab3add99921f0681812264b Mon Sep 17 00:00:00 2001 From: Guoxuan Xu Date: Mon, 28 Oct 2024 15:27:09 -0700 Subject: [PATCH] aggregating candidate info --- impact_evaluation/eda_delay_analysis.ipynb | 88 ++++ .../eval_finding_top10_RRcandidate.ipynb | 479 ------------------ .../eval_finding_top10_candidate.ipynb | 170 +++++++ .../finding_cost_revenue.cpython-38.pyc | Bin 3184 -> 3382 bytes .../finding_roundtripFlights.cpython-38.pyc | Bin 3263 -> 3263 bytes 5 files changed, 258 insertions(+), 479 deletions(-) create mode 100644 impact_evaluation/eda_delay_analysis.ipynb delete mode 100644 impact_evaluation/eval_finding_top10_RRcandidate.ipynb create mode 100644 impact_evaluation/eval_finding_top10_candidate.ipynb diff --git a/impact_evaluation/eda_delay_analysis.ipynb b/impact_evaluation/eda_delay_analysis.ipynb new file mode 100644 index 0000000..03668d4 --- /dev/null +++ b/impact_evaluation/eda_delay_analysis.ipynb @@ -0,0 +1,88 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# import data\n", + "\n", + "candidates_rt = pd.read_csv('../data/temproary_data/candidate_roundTrip_route.csv')\n", + "candidates_rt['round_trip_route_IATA'] = candidates_rt['round_trip_route_IATA'].apply(eval)\n", + "roundTrips = pd.read_csv('../data/temproary_data/round_trip_flights.csv')\n", + "airports_info = pd.read_csv('../data/cleaned_data/Airport_Codes.csv')\n", + "tickets_info = pd.read_csv('../data/cleaned_data/Tickets.csv')\n", + "\n", + "tickets_info = tickets_info.assign(\n", + " sorted_route=tickets_info.apply(\n", + " lambda x: tuple(\n", + " sorted([x[\"ORIGIN_AIRPORT_IATA_CODE\"], x[\"DEST_AIRPORT_IATA_CODE\"]])\n", + " ),\n", + " axis=1,\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# gather relevent data\n", + "\n", + "airports_can = {i for pair in candidates_rt['round_trip_route_IATA'].values for i in pair}\n", + "\n", + "candidate_roundTrips = roundTrips[roundTrips['round_trip_route_IATA'].isin(candidates_rt['round_trip_route_IATA'])]\n", + "candidate_airports = airports_info[airports_info['AIRPORT_IATA_CODE'].apply(lambda x: x in airports_can)]\n", + "candidate_tickets = tickets_info[tickets_info['sorted_route'].isin(candidates_rt['round_trip_route_IATA'])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Delay analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tongConsultinInc", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/impact_evaluation/eval_finding_top10_RRcandidate.ipynb b/impact_evaluation/eval_finding_top10_RRcandidate.ipynb deleted file mode 100644 index 44b34af..0000000 --- a/impact_evaluation/eval_finding_top10_RRcandidate.ipynb +++ /dev/null @@ -1,479 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Main statistical analysis\n", - "- narrow candidate to 10 candidate using four metrics(total profit, weekly profit, monthly profit, flights frequency)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 199, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "roundTrip_profit_g = pd.read_csv('../data/temproary_data/roundTrip_profit.csv')\n", - "roundTrip_profit_g['round_trip_route_IATA'] = roundTrip_profit_g['round_trip_route_IATA'].apply(lambda x: eval(x))\n", - "roundTrip_profit_g['inbound_FL_DATE'] = pd.to_datetime(roundTrip_profit_g['inbound_FL_DATE'])\n", - "roundTrip_profit_g['outbound_FL_DATE'] = pd.to_datetime(roundTrip_profit_g['outbound_FL_DATE'])\n", - "\n", - "roundTrip_fre_g = pd.read_csv('../data/temproary_data/round_trip_flights.csv')\n", - "roundTrip_fre_g['round_trip_route_IATA'] = roundTrip_fre_g['round_trip_route_IATA'].apply(lambda x: eval(x))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Aggregrating all the factors and assign ranks to each factors for round-trip route\n", - "consider factors:\n", - "- flight frequency\n", - "- flight profit (total profit - total cost)\n", - "- TODO: weekly profit" - ] - }, - { - "cell_type": "code", - "execution_count": 200, - "metadata": {}, - "outputs": [], - "source": [ - "# calculate the ranking of the profit\n", - "roundTrip_profit = (roundTrip_profit_g.groupby('round_trip_route_IATA')['profit']\n", - " .sum()\n", - " .reset_index()\n", - " .rename(columns={'profit':'round_trip_profit'}))\n", - "roundTrip_profit = (roundTrip_profit.assign(round_trip_profit_ranking = roundTrip_profit['round_trip_profit']\n", - " .rank(ascending = False, method = 'min')))\n", - "\n", - "\n", - "# calculate the ranking of the frequency\n", - "roundTrip_fre = (roundTrip_fre_g.groupby('round_trip_route_IATA')['outbound_OCCUPANCY_RATE']\n", - " .count().sort_values(ascending=False)\n", - " .reset_index()\n", - " .rename(columns={'outbound_OCCUPANCY_RATE':'round_trip_flights_count'}))\n", - "roundTrip_fre = roundTrip_fre[roundTrip_fre['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", - "roundTrip_fre = (roundTrip_fre.assign(round_trip_flights_count_ranking = roundTrip_fre['round_trip_flights_count']\n", - " .rank(ascending = False, method = 'min')))\n", - "\n", - "# calculate the monthly average profit\n", - "roundTrip_profit_m = (roundTrip_profit_g.assign(month = np.minimum(roundTrip_profit_g['inbound_FL_DATE'], roundTrip_profit_g['outbound_FL_DATE']).dt.month)\n", - " .groupby('round_trip_route_IATA')\n", - " .apply(lambda df: df.groupby('month')['profit'].sum().mean())\n", - " .reset_index()\n", - " .rename(columns={0:'monthly_avg_profit'}))\n", - "roundTrip_profit_m = roundTrip_profit_m[roundTrip_profit_m['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", - "roundTrip_profit_m = (roundTrip_profit_m.assign(monthly_avg_profit_ranking = roundTrip_profit_m['monthly_avg_profit']\n", - " .rank(ascending = False, method = 'min')))\n", - "\n", - "# calculate the weekly average profit\n", - "roundTrip_profit_w = (roundTrip_profit_g.assign(week = np.minimum(roundTrip_profit_g['inbound_FL_DATE'], roundTrip_profit_g['outbound_FL_DATE']).dt.isocalendar().week)\n", - " .groupby('round_trip_route_IATA')\n", - " .apply(lambda df: df.groupby('week')['profit'].sum().mean())\n", - " .reset_index()\n", - " .rename(columns={0:'weekly_avg_profit'}))\n", - "roundTrip_profit_w = roundTrip_profit_w[roundTrip_profit_w['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", - "roundTrip_profit_w = (roundTrip_profit_w.assign(weekly_avg_profit_ranking = roundTrip_profit_w['weekly_avg_profit']\n", - " .rank(ascending = False, method = 'min')))\n", - "\n", - "# calculate the ranking of the operation career\n", - "roundTrip_op = (roundTrip_profit_g\n", - " .groupby('round_trip_route_IATA')\n", - " .apply(lambda df: len(set(df['inbound_OP_CARRIER']).union(set(df['outbound_OP_CARRIER']))))\n", - " .reset_index()\n", - " .rename(columns={0:'round_trip_op_count'}))\n", - "\n", - "# calcualte the proprotion of the delay flights for each round trip route\n", - "inbound_trips = roundTrip_profit_g[['inbound_DEP_DELAY', 'inbound_ARR_DELAY', 'round_trip_route_IATA']]\n", - "outbound_trips = roundTrip_profit_g[['outbound_DEP_DELAY', 'outbound_ARR_DELAY', 'round_trip_route_IATA']]\n", - "\n", - "all_flights = pd.concat(\n", - " [inbound_trips.rename(columns={'inbound_DEP_DELAY':'DEP_DELAY', 'inbound_ARR_DELAY':'ARR_DELAY'}, inplace= False),\n", - " outbound_trips.rename(columns={'outbound_DEP_DELAY':'DEP_DELAY', 'outbound_ARR_DELAY':'ARR_DELAY'}, inplace= False)],\n", - " axis=0\n", - ")\n", - "all_flights = all_flights[all_flights['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", - "\n", - "all_flights['is_dep_delay'] = all_flights['DEP_DELAY'].apply(lambda val : True if val >= 15 else False if val <= -15 else 0)\n", - "all_flights['is_arr_delay'] = all_flights['ARR_DELAY'].apply(lambda val : True if val >= 15 else False if val <= -15 else 0)\n", - "\n", - "roundTrip_dep_delay_rate = all_flights.groupby('round_trip_route_IATA')['is_dep_delay'].mean().sort_values(ascending=True).reset_index().rename(columns={'is_dep_delay':'dep_delay_rate'})\n", - "roundTrip_dep_delay_rate = (roundTrip_dep_delay_rate.assign(dep_delay_rate_ranking = roundTrip_dep_delay_rate['dep_delay_rate'].rank(ascending = True, method = 'min')))\n", - "\n", - "roundTrip_arr_delay_rate = all_flights.groupby('round_trip_route_IATA')['is_arr_delay'].mean().sort_values(ascending=True).reset_index().rename(columns={'is_arr_delay':'arr_delay_rate'})\n", - "roundTrip_arr_delay_rate = (roundTrip_arr_delay_rate.assign(arr_delay_rate_ranking = roundTrip_arr_delay_rate['arr_delay_rate'].rank(ascending = True, method = 'min')))\n", - "\n", - "# calcualate the average delay rate for each round trip route\n", - "\n", - "\n", - "\n", - "# # merge the ranks\n", - "round_trip_info = roundTrip_profit.merge(roundTrip_fre, on='round_trip_route_IATA', how='inner')\n", - "\n", - "round_trip_info = round_trip_info.merge(roundTrip_profit_m, on='round_trip_route_IATA', how='left')\n", - "\n", - "round_trip_info = round_trip_info.merge(roundTrip_profit_w, on='round_trip_route_IATA', how='left')\n", - "\n", - "round_trip_info = round_trip_info.merge(roundTrip_op, on='round_trip_route_IATA', how='left')\n", - "\n", - "round_trip_info = round_trip_info.merge(roundTrip_dep_delay_rate, on='round_trip_route_IATA', how='left')\n", - "\n", - "round_trip_info = round_trip_info.merge(roundTrip_arr_delay_rate, on='round_trip_route_IATA', how='left')\n", - "\n", - "\n", - "\n", - "\n", - "# # # aggregate the ranking\n", - "round_trip_info = round_trip_info.assign(avg_ranking = ((round_trip_info['round_trip_flights_count_ranking'] * 0.2 + \n", - " round_trip_info['round_trip_profit_ranking'] * 0.7 + \n", - " round_trip_info['monthly_avg_profit_ranking'] * 0.5 +\n", - " round_trip_info['weekly_avg_profit_ranking'] * 0.5) / 4))\n", - "candidate_roundTrip_route = round_trip_info.sort_values(by='avg_ranking', ascending=True).head(10)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 201, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
round_trip_route_IATAround_trip_profitround_trip_profit_rankinground_trip_flights_countround_trip_flights_count_rankingmonthly_avg_profitmonthly_avg_profit_rankingweekly_avg_profitweekly_avg_profit_rankinground_trip_op_countdep_delay_ratedep_delay_rate_rankingarr_delay_ratearr_delay_rate_rankingavg_ranking
1978(JFK, LAX)1.065010e+081.031404.03.550033e+071.08.192385e+061.040.153025924.00.1721341153.00.625
2004(JFK, SFO)4.670737e+078.0184221.01.556912e+078.03.592874e+068.040.2263842131.00.2565152277.04.450
1109(DCA, ORD)4.445894e+079.0176424.01.481965e+079.03.419918e+069.060.1890591576.00.2134351801.05.025
128(ATL, CLT)4.716123e+077.0153443.01.572041e+077.03.627787e+067.030.124185466.00.140482584.05.125
1555(EWR, SFO)5.828642e+073.0119994.01.942881e+073.04.483570e+063.020.2777312544.00.2973312536.05.975
1093(DCA, LGA)4.069849e+0712.0167435.01.356616e+0712.03.130653e+0612.020.2222222081.00.2965952533.06.850
1283(DFW, IAH)3.786681e+0715.0143257.01.262227e+0715.02.912832e+0615.070.1829611471.00.2461592178.09.225
2392(MSP, ORD)3.276681e+0725.0170531.01.092227e+0725.02.520524e+0625.090.2269792136.00.2480942199.012.175
1382(DSM, ORD)3.788485e+0714.0947161.01.262828e+0714.02.914219e+0614.080.2402322280.00.2935592519.014.000
831(CLT, GSP)5.746858e+074.0772253.01.915619e+074.04.420660e+064.010.139896703.00.123705363.014.350
\n", - "
" - ], - "text/plain": [ - " round_trip_route_IATA round_trip_profit round_trip_profit_ranking \\\n", - "1978 (JFK, LAX) 1.065010e+08 1.0 \n", - "2004 (JFK, SFO) 4.670737e+07 8.0 \n", - "1109 (DCA, ORD) 4.445894e+07 9.0 \n", - "128 (ATL, CLT) 4.716123e+07 7.0 \n", - "1555 (EWR, SFO) 5.828642e+07 3.0 \n", - "1093 (DCA, LGA) 4.069849e+07 12.0 \n", - "1283 (DFW, IAH) 3.786681e+07 15.0 \n", - "2392 (MSP, ORD) 3.276681e+07 25.0 \n", - "1382 (DSM, ORD) 3.788485e+07 14.0 \n", - "831 (CLT, GSP) 5.746858e+07 4.0 \n", - "\n", - " round_trip_flights_count round_trip_flights_count_ranking \\\n", - "1978 3140 4.0 \n", - "2004 1842 21.0 \n", - "1109 1764 24.0 \n", - "128 1534 43.0 \n", - "1555 1199 94.0 \n", - "1093 1674 35.0 \n", - "1283 1432 57.0 \n", - "2392 1705 31.0 \n", - "1382 947 161.0 \n", - "831 772 253.0 \n", - "\n", - " monthly_avg_profit monthly_avg_profit_ranking weekly_avg_profit \\\n", - "1978 3.550033e+07 1.0 8.192385e+06 \n", - "2004 1.556912e+07 8.0 3.592874e+06 \n", - "1109 1.481965e+07 9.0 3.419918e+06 \n", - "128 1.572041e+07 7.0 3.627787e+06 \n", - "1555 1.942881e+07 3.0 4.483570e+06 \n", - "1093 1.356616e+07 12.0 3.130653e+06 \n", - "1283 1.262227e+07 15.0 2.912832e+06 \n", - "2392 1.092227e+07 25.0 2.520524e+06 \n", - "1382 1.262828e+07 14.0 2.914219e+06 \n", - "831 1.915619e+07 4.0 4.420660e+06 \n", - "\n", - " weekly_avg_profit_ranking round_trip_op_count dep_delay_rate \\\n", - "1978 1.0 4 0.153025 \n", - "2004 8.0 4 0.226384 \n", - "1109 9.0 6 0.189059 \n", - "128 7.0 3 0.124185 \n", - "1555 3.0 2 0.277731 \n", - "1093 12.0 2 0.222222 \n", - "1283 15.0 7 0.182961 \n", - "2392 25.0 9 0.226979 \n", - "1382 14.0 8 0.240232 \n", - "831 4.0 1 0.139896 \n", - "\n", - " dep_delay_rate_ranking arr_delay_rate arr_delay_rate_ranking \\\n", - "1978 924.0 0.172134 1153.0 \n", - "2004 2131.0 0.256515 2277.0 \n", - "1109 1576.0 0.213435 1801.0 \n", - "128 466.0 0.140482 584.0 \n", - "1555 2544.0 0.297331 2536.0 \n", - "1093 2081.0 0.296595 2533.0 \n", - "1283 1471.0 0.246159 2178.0 \n", - "2392 2136.0 0.248094 2199.0 \n", - "1382 2280.0 0.293559 2519.0 \n", - "831 703.0 0.123705 363.0 \n", - "\n", - " avg_ranking \n", - "1978 0.625 \n", - "2004 4.450 \n", - "1109 5.025 \n", - "128 5.125 \n", - "1555 5.975 \n", - "1093 6.850 \n", - "1283 9.225 \n", - "2392 12.175 \n", - "1382 14.000 \n", - "831 14.350 " - ] - }, - "execution_count": 201, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "candidate_roundTrip_route" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "tongConsultinInc", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.19" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/impact_evaluation/eval_finding_top10_candidate.ipynb b/impact_evaluation/eval_finding_top10_candidate.ipynb new file mode 100644 index 0000000..ef02891 --- /dev/null +++ b/impact_evaluation/eval_finding_top10_candidate.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Main statistical analysis\n", + "- narrow candidate to 10 candidate using four metrics(total profit, weekly profit, monthly profit, flights frequency)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "roundTrip_profit_g = pd.read_csv('../data/temproary_data/roundTrip_profit.csv')\n", + "roundTrip_profit_g['round_trip_route_IATA'] = roundTrip_profit_g['round_trip_route_IATA'].apply(lambda x: eval(x))\n", + "roundTrip_profit_g['inbound_FL_DATE'] = pd.to_datetime(roundTrip_profit_g['inbound_FL_DATE'])\n", + "roundTrip_profit_g['outbound_FL_DATE'] = pd.to_datetime(roundTrip_profit_g['outbound_FL_DATE'])\n", + "\n", + "roundTrip_fre_g = pd.read_csv('../data/temproary_data/round_trip_flights.csv')\n", + "roundTrip_fre_g['round_trip_route_IATA'] = roundTrip_fre_g['round_trip_route_IATA'].apply(lambda x: eval(x))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aggregrating all the factors and assign ranks to each factors for round-trip route\n", + "consider factors:\n", + "- flight frequency\n", + "- flight profit (total profit - total cost)\n", + "- TODO: weekly profit" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# calculate the ranking of the profit\n", + "roundTrip_profit = (roundTrip_profit_g.groupby('round_trip_route_IATA')['profit']\n", + " .sum()\n", + " .reset_index()\n", + " .rename(columns={'profit':'round_trip_profit'}))\n", + "roundTrip_profit = (roundTrip_profit.assign(round_trip_profit_ranking = roundTrip_profit['round_trip_profit']\n", + " .rank(ascending = False, method = 'min')))\n", + "\n", + "\n", + "# calculate the ranking of the frequency\n", + "roundTrip_fre = (roundTrip_fre_g.groupby('round_trip_route_IATA')['outbound_OCCUPANCY_RATE']\n", + " .count().sort_values(ascending=False)\n", + " .reset_index()\n", + " .rename(columns={'outbound_OCCUPANCY_RATE':'round_trip_flights_count'}))\n", + "roundTrip_fre = roundTrip_fre[roundTrip_fre['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", + "roundTrip_fre = (roundTrip_fre.assign(round_trip_flights_count_ranking = roundTrip_fre['round_trip_flights_count']\n", + " .rank(ascending = False, method = 'min')))\n", + "\n", + "# calculate the monthly average profit\n", + "roundTrip_profit_m = (roundTrip_profit_g.assign(month = np.minimum(roundTrip_profit_g['inbound_FL_DATE'], roundTrip_profit_g['outbound_FL_DATE']).dt.month)\n", + " .groupby('round_trip_route_IATA')\n", + " .apply(lambda df: df.groupby('month')['profit'].sum().mean())\n", + " .reset_index()\n", + " .rename(columns={0:'monthly_avg_profit'}))\n", + "roundTrip_profit_m = roundTrip_profit_m[roundTrip_profit_m['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", + "roundTrip_profit_m = (roundTrip_profit_m.assign(monthly_avg_profit_ranking = roundTrip_profit_m['monthly_avg_profit']\n", + " .rank(ascending = False, method = 'min')))\n", + "\n", + "# calculate the weekly average profit\n", + "roundTrip_profit_w = (roundTrip_profit_g.assign(week = np.minimum(roundTrip_profit_g['inbound_FL_DATE'], roundTrip_profit_g['outbound_FL_DATE']).dt.isocalendar().week)\n", + " .groupby('round_trip_route_IATA')\n", + " .apply(lambda df: df.groupby('week')['profit'].sum().mean())\n", + " .reset_index()\n", + " .rename(columns={0:'weekly_avg_profit'}))\n", + "roundTrip_profit_w = roundTrip_profit_w[roundTrip_profit_w['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", + "roundTrip_profit_w = (roundTrip_profit_w.assign(weekly_avg_profit_ranking = roundTrip_profit_w['weekly_avg_profit']\n", + " .rank(ascending = False, method = 'min')))\n", + "\n", + "# calculate the ranking of the operation career\n", + "roundTrip_op = (roundTrip_profit_g\n", + " .groupby('round_trip_route_IATA')\n", + " .apply(lambda df: len(set(df['inbound_OP_CARRIER']).union(set(df['outbound_OP_CARRIER']))))\n", + " .reset_index()\n", + " .rename(columns={0:'round_trip_op_count'}))\n", + "\n", + "# calcualte the proprotion of the delay flights for each round trip route\n", + "inbound_trips = roundTrip_profit_g[['inbound_DEP_DELAY', 'inbound_ARR_DELAY', 'round_trip_route_IATA']]\n", + "outbound_trips = roundTrip_profit_g[['outbound_DEP_DELAY', 'outbound_ARR_DELAY', 'round_trip_route_IATA']]\n", + "\n", + "all_flights = pd.concat(\n", + " [inbound_trips.rename(columns={'inbound_DEP_DELAY':'DEP_DELAY', 'inbound_ARR_DELAY':'ARR_DELAY'}, inplace= False),\n", + " outbound_trips.rename(columns={'outbound_DEP_DELAY':'DEP_DELAY', 'outbound_ARR_DELAY':'ARR_DELAY'}, inplace= False)],\n", + " axis=0\n", + ")\n", + "all_flights = all_flights[all_flights['round_trip_route_IATA'].isin(roundTrip_profit['round_trip_route_IATA'])]\n", + "\n", + "all_flights['is_dep_delay'] = all_flights['DEP_DELAY'].apply(lambda val : True if val >= 15 else False if val <= -15 else 0)\n", + "all_flights['is_arr_delay'] = all_flights['ARR_DELAY'].apply(lambda val : True if val >= 15 else False if val <= -15 else 0)\n", + "\n", + "roundTrip_dep_delay_rate = all_flights.groupby('round_trip_route_IATA')['is_dep_delay'].mean().sort_values(ascending=True).reset_index().rename(columns={'is_dep_delay':'dep_delay_rate'})\n", + "roundTrip_dep_delay_rate = (roundTrip_dep_delay_rate.assign(dep_delay_rate_ranking = roundTrip_dep_delay_rate['dep_delay_rate'].rank(ascending = True, method = 'min')))\n", + "\n", + "roundTrip_arr_delay_rate = all_flights.groupby('round_trip_route_IATA')['is_arr_delay'].mean().sort_values(ascending=True).reset_index().rename(columns={'is_arr_delay':'arr_delay_rate'})\n", + "roundTrip_arr_delay_rate = (roundTrip_arr_delay_rate.assign(arr_delay_rate_ranking = roundTrip_arr_delay_rate['arr_delay_rate'].rank(ascending = True, method = 'min')))\n", + "\n", + "# calcualate the average delay rate for each round trip route\n", + "\n", + "\n", + "\n", + "# # merge the ranks\n", + "round_trip_info = roundTrip_profit.merge(roundTrip_fre, on='round_trip_route_IATA', how='inner')\n", + "\n", + "round_trip_info = round_trip_info.merge(roundTrip_profit_m, on='round_trip_route_IATA', how='left')\n", + "\n", + "round_trip_info = round_trip_info.merge(roundTrip_profit_w, on='round_trip_route_IATA', how='left')\n", + "\n", + "round_trip_info = round_trip_info.merge(roundTrip_op, on='round_trip_route_IATA', how='left')\n", + "\n", + "round_trip_info = round_trip_info.merge(roundTrip_dep_delay_rate, on='round_trip_route_IATA', how='left')\n", + "\n", + "round_trip_info = round_trip_info.merge(roundTrip_arr_delay_rate, on='round_trip_route_IATA', how='left')\n", + "\n", + "\n", + "\n", + "\n", + "# # # aggregate the ranking\n", + "round_trip_info = round_trip_info.assign(avg_ranking = ((round_trip_info['round_trip_flights_count_ranking'] * 0.2 + \n", + " round_trip_info['round_trip_profit_ranking'] * 0.7 + \n", + " round_trip_info['monthly_avg_profit_ranking'] * 0.5 +\n", + " round_trip_info['weekly_avg_profit_ranking'] * 0.5) / 4))\n", + "candidate_roundTrip_route = round_trip_info.sort_values(by='avg_ranking', ascending=True).head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "candidate_roundTrip_route['round_trip_route_IATA'].to_csv('../data/temproary_data/candidate_roundTrip_route.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tongConsultinInc", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/__pycache__/finding_cost_revenue.cpython-38.pyc b/src/__pycache__/finding_cost_revenue.cpython-38.pyc index 2a6afbc426b24d2b5552967aae70626416f8b15a..9dd7913ce9a5d2e06ec7496d8e0b1ab9d44e2f53 100644 GIT binary patch delta 1605 zcmZ{k&rjQC7{}k&u^lH)LJBE~X(-T)Qd1~}778tNGK-s4g9HU=w_LJpuZhg^1KDQk zCQvRr?2-d|FV)t=I98f=nY4dlyKjfdr&Zb|mv!1PU+Sr63VY5($<7?!QsW)SoHyeW2$^3FAk_JZjXM%U_1Wi%?{d%ndb^W> zP#@qu29y2#{*0iFY-zsPIHSGiU zymu?VQ`p=pZ|KGH`u+U&mabwe$ox-{CDT#d7+^a=HtYh4;!BYW|6@hz*LWFA^jB=Q zoIrv-kw^+UQ9-8o(^yx96!eYYh{u>Y!#*whbfl+Av@IR7oVy%zV~BcXLdTfI+xReW z5ZJ{B963k&tDMq8WPr&4Nwj&=@0riDnlket|4NF%ZuEaryq+O>l0on?@CQN?kraD$ zZ&L~xA{R)y$}6a~8zjTzVjE62LPp6Lxdh=}lw{i6>9V&JsMNTEl5oEacQWBidxcd_ z@+zlx6`Ay{zUr->Qjq7E_D0v(`4foDylNxY6UYhmY`qbg^||J}s3D#>X^gjPH9TNt=UlBB%k)esOiyv8_x`;64O3SrM z{UcJj`;Z=s#eUZANDWwnU`hfK^m%ZWE(Eg~==L+Kt#KABv&-ff5T*rDRXfn&UE`aU z7~r_fGS6iuK~4eQWUkY!G(LL)EW>~iz$jpbLB%d#d1TsF8X^|~V}MKaD``fa19<~* zo&F*<;`1OCzye^17DF0+99oRug5eTi5pbLS7MgF}0lCb;9ujq$PNi11j8BcmzG1Dv z)J<=yB<@+w9n(?6E^pdq!xid=RW)3(V%ui5;l>La{(tqF?hY0n=*4$+Eqf*|RQC32 zpIK3s)(zPEt93WJuDxI0&^9;qd`>UA!>d{@r{(l=QGcN4w{`DDaXPJvRt&ZUZzL-Y zn4s9JxBkLQ>|&()HQ$(&;YA|dw>uC-so+^nB}bJQ)GhGYLN2+IK_4_W78FSLRAnxBGF#mcQEC}slKdtYM2&dWTH73@bx(wd(KP27m3E@Lq^Nxi`hX zv>mP(ijl~|6rb-&th~0F@`vWI4|JQQ1kDU8_Y#z6fus-@juNudk)PD zh*>^t)zS-SW)K$Vyk4c-t?>6Ech;{ zwE}hpL-R7UrSG%)PPyqBp;m5`Yhi3JV5K0`{YIl)60X0$U%ksLksPXx)?S$WyztHT z7lmSRWApmPSDw!BW5jGk0)g*gYjCH`N9`#)@>uu{+tmj|utE5)J(rJDyiBV6r>Jl6 zx$f|%_89-mW_pUI{?&C}NlfS;i*hf)JBe)N9PTxPC