diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 0000000..6d0498e
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,42 @@
+name: CI
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ run-tests:
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, windows-latest, macos-latest]
+ python-version:
+ - "3.7"
+ - "3.8"
+ - "3.9"
+ - "3.10"
+ - "3.11"
+ - "3.12-dev"
+ - "pypy-3.7"
+ - "pypy-3.8"
+ - "pypy-3.9"
+
+ name: Test
+ runs-on: ${{ matrix.os }}
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+
+ - name: Run tests
+ run: pytest
\ No newline at end of file
diff --git a/data_eda.ipynb b/data_eda.ipynb
new file mode 100644
index 0000000..3622756
--- /dev/null
+++ b/data_eda.ipynb
@@ -0,0 +1,1034 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 279,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 280,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/anaconda3/envs/tongConsultinInc/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3508: DtypeWarning: Columns (3,13,14) have mixed types.Specify dtype option on import or set low_memory=False.\n",
+ " exec(code_obj, self.user_global_ns, self.user_ns)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Load the data\n",
+ "routes = pd.read_csv('data/Flights.csv')\n",
+ "ticket_price = pd.read_csv('data/Tickets.csv')\n",
+ "airportsInfo = pd.read_csv('data/Airport_Codes.csv')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Flights\n",
+ "- FL_DATE: string to stanarded datetime object\n",
+ "- ORIGIN_CITY_NAME: split into city and state\n",
+ "- DEST_CITY_NAME: split into city and state\n",
+ "- AIR_TIME: \n",
+ " - Two; NAN; negative number; number in str\n",
+ "- DISTANCE: to float"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 281,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "routes['FL_DATE'] = pd.to_datetime(routes['FL_DATE'])\n",
+ "\n",
+ "# \n",
+ "routes['ORIGIN_STATE_NAME'] = routes['ORIGIN_CITY_NAME'].str.split(', ').str[1]\n",
+ "routes['ORIGIN_CITY_NAME'] = routes['ORIGIN_CITY_NAME'].str.split(', ').str[0]\n",
+ "routes['DEST_STATE_NAME'] = routes['DEST_CITY_NAME'].str.split(', ').str[1]\n",
+ "routes['DEST_CITY_NAME'] = routes['DEST_CITY_NAME'].str.split(', ').str[0]\n",
+ "\n",
+ "# air time column adjustments\n",
+ "routes['AIR_TIME'] = routes['AIR_TIME'].apply(lambda x: 2.0 if x == 'Two' else x)\n",
+ "routes['AIR_TIME'] = routes['AIR_TIME'].apply(lambda x: np.nan if x == 'NAN' or x == '$$$' else x)\n",
+ "routes['AIR_TIME'] = routes['AIR_TIME'].apply(lambda x: 121.0 if x == '121.0' else x)\n",
+ "routes['AIR_TIME'] = routes['AIR_TIME'].astype(float)\n",
+ "\n",
+ "# clean and convert distance to float\n",
+ "def distance_to_float(val):\n",
+ " try:\n",
+ " float_val = float(val)\n",
+ " if float_val < 0:\n",
+ " return -1 * float_val\n",
+ " return float_val\n",
+ " except:\n",
+ " return np.nan\n",
+ " \n",
+ "routes['DISTANCE'] = routes['DISTANCE'].apply(distance_to_float)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 282,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " FL_DATE | \n",
+ " OP_CARRIER | \n",
+ " TAIL_NUM | \n",
+ " OP_CARRIER_FL_NUM | \n",
+ " ORIGIN_AIRPORT_ID | \n",
+ " ORIGIN | \n",
+ " ORIGIN_CITY_NAME | \n",
+ " DEST_AIRPORT_ID | \n",
+ " DESTINATION | \n",
+ " DEST_CITY_NAME | \n",
+ " DEP_DELAY | \n",
+ " ARR_DELAY | \n",
+ " CANCELLED | \n",
+ " AIR_TIME | \n",
+ " DISTANCE | \n",
+ " OCCUPANCY_RATE | \n",
+ " ORIGIN_STATE_NAME | \n",
+ " DEST_STATE_NAME | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2019-03-02 | \n",
+ " WN | \n",
+ " N955WN | \n",
+ " 4591 | \n",
+ " 14635 | \n",
+ " RSW | \n",
+ " Fort Myers | \n",
+ " 11042 | \n",
+ " CLE | \n",
+ " Cleveland | \n",
+ " -8.0 | \n",
+ " -6.0 | \n",
+ " 0.0 | \n",
+ " 143.0 | \n",
+ " 1025.0 | \n",
+ " 0.970000 | \n",
+ " FL | \n",
+ " OH | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2019-03-02 | \n",
+ " WN | \n",
+ " N8686A | \n",
+ " 3231 | \n",
+ " 14635 | \n",
+ " RSW | \n",
+ " Fort Myers | \n",
+ " 11066 | \n",
+ " CMH | \n",
+ " Columbus | \n",
+ " 1.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " 135.0 | \n",
+ " 930.0 | \n",
+ " 0.550000 | \n",
+ " FL | \n",
+ " OH | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2019-03-02 | \n",
+ " WN | \n",
+ " N201LV | \n",
+ " 3383 | \n",
+ " 14635 | \n",
+ " RSW | \n",
+ " Fort Myers | \n",
+ " 11066 | \n",
+ " CMH | \n",
+ " Columbus | \n",
+ " 0.0 | \n",
+ " 4.0 | \n",
+ " 0.0 | \n",
+ " 132.0 | \n",
+ " 930.0 | \n",
+ " 0.910000 | \n",
+ " FL | \n",
+ " OH | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2019-03-02 | \n",
+ " WN | \n",
+ " N413WN | \n",
+ " 5498 | \n",
+ " 14635 | \n",
+ " RSW | \n",
+ " Fort Myers | \n",
+ " 11066 | \n",
+ " CMH | \n",
+ " Columbus | \n",
+ " 11.0 | \n",
+ " 14.0 | \n",
+ " 0.0 | \n",
+ " 136.0 | \n",
+ " 930.0 | \n",
+ " 0.670000 | \n",
+ " FL | \n",
+ " OH | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2019-03-02 | \n",
+ " WN | \n",
+ " N7832A | \n",
+ " 6933 | \n",
+ " 14635 | \n",
+ " RSW | \n",
+ " Fort Myers | \n",
+ " 11259 | \n",
+ " DAL | \n",
+ " Dallas | \n",
+ " 0.0 | \n",
+ " -17.0 | \n",
+ " 0.0 | \n",
+ " 151.0 | \n",
+ " 1005.0 | \n",
+ " 0.620000 | \n",
+ " FL | \n",
+ " TX | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1915881 | \n",
+ " 2019-03-23 | \n",
+ " AA | \n",
+ " N903NN | \n",
+ " 1433 | \n",
+ " 15370 | \n",
+ " TUL | \n",
+ " Tulsa | \n",
+ " 11057 | \n",
+ " CLT | \n",
+ " Charlotte | \n",
+ " -9.0 | \n",
+ " -6.0 | \n",
+ " 0.0 | \n",
+ " 112.0 | \n",
+ " NaN | \n",
+ " 0.794884 | \n",
+ " OK | \n",
+ " NC | \n",
+ "
\n",
+ " \n",
+ " 1915882 | \n",
+ " 2019-03-24 | \n",
+ " AA | \n",
+ " N965AN | \n",
+ " 1433 | \n",
+ " 15370 | \n",
+ " TUL | \n",
+ " Tulsa | \n",
+ " 11057 | \n",
+ " CLT | \n",
+ " Charlotte | \n",
+ " -2.0 | \n",
+ " -1.0 | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " NaN | \n",
+ " 0.538399 | \n",
+ " OK | \n",
+ " NC | \n",
+ "
\n",
+ " \n",
+ " 1915883 | \n",
+ " 2019-03-25 | \n",
+ " AA | \n",
+ " N979NN | \n",
+ " 1433 | \n",
+ " 15370 | \n",
+ " TUL | \n",
+ " Tulsa | \n",
+ " 11057 | \n",
+ " CLT | \n",
+ " Charlotte | \n",
+ " -8.0 | \n",
+ " -25.0 | \n",
+ " 0.0 | \n",
+ " 106.0 | \n",
+ " NaN | \n",
+ " 0.955579 | \n",
+ " OK | \n",
+ " NC | \n",
+ "
\n",
+ " \n",
+ " 1915884 | \n",
+ " 2019-03-26 | \n",
+ " AA | \n",
+ " N872NN | \n",
+ " 1433 | \n",
+ " 15370 | \n",
+ " TUL | \n",
+ " Tulsa | \n",
+ " 11057 | \n",
+ " CLT | \n",
+ " Charlotte | \n",
+ " -9.0 | \n",
+ " -6.0 | \n",
+ " 0.0 | \n",
+ " 112.0 | \n",
+ " NaN | \n",
+ " 0.595344 | \n",
+ " OK | \n",
+ " NC | \n",
+ "
\n",
+ " \n",
+ " 1915885 | \n",
+ " 2019-03-27 | \n",
+ " AA | \n",
+ " N945AN | \n",
+ " 1433 | \n",
+ " 15370 | \n",
+ " TUL | \n",
+ " Tulsa | \n",
+ " 11057 | \n",
+ " CLT | \n",
+ " Charlotte | \n",
+ " -8.0 | \n",
+ " 5.0 | \n",
+ " 0.0 | \n",
+ " 117.0 | \n",
+ " NaN | \n",
+ " 0.350192 | \n",
+ " OK | \n",
+ " NC | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1915886 rows × 18 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " FL_DATE OP_CARRIER TAIL_NUM OP_CARRIER_FL_NUM ORIGIN_AIRPORT_ID \\\n",
+ "0 2019-03-02 WN N955WN 4591 14635 \n",
+ "1 2019-03-02 WN N8686A 3231 14635 \n",
+ "2 2019-03-02 WN N201LV 3383 14635 \n",
+ "3 2019-03-02 WN N413WN 5498 14635 \n",
+ "4 2019-03-02 WN N7832A 6933 14635 \n",
+ "... ... ... ... ... ... \n",
+ "1915881 2019-03-23 AA N903NN 1433 15370 \n",
+ "1915882 2019-03-24 AA N965AN 1433 15370 \n",
+ "1915883 2019-03-25 AA N979NN 1433 15370 \n",
+ "1915884 2019-03-26 AA N872NN 1433 15370 \n",
+ "1915885 2019-03-27 AA N945AN 1433 15370 \n",
+ "\n",
+ " ORIGIN ORIGIN_CITY_NAME DEST_AIRPORT_ID DESTINATION DEST_CITY_NAME \\\n",
+ "0 RSW Fort Myers 11042 CLE Cleveland \n",
+ "1 RSW Fort Myers 11066 CMH Columbus \n",
+ "2 RSW Fort Myers 11066 CMH Columbus \n",
+ "3 RSW Fort Myers 11066 CMH Columbus \n",
+ "4 RSW Fort Myers 11259 DAL Dallas \n",
+ "... ... ... ... ... ... \n",
+ "1915881 TUL Tulsa 11057 CLT Charlotte \n",
+ "1915882 TUL Tulsa 11057 CLT Charlotte \n",
+ "1915883 TUL Tulsa 11057 CLT Charlotte \n",
+ "1915884 TUL Tulsa 11057 CLT Charlotte \n",
+ "1915885 TUL Tulsa 11057 CLT Charlotte \n",
+ "\n",
+ " DEP_DELAY ARR_DELAY CANCELLED AIR_TIME DISTANCE OCCUPANCY_RATE \\\n",
+ "0 -8.0 -6.0 0.0 143.0 1025.0 0.970000 \n",
+ "1 1.0 5.0 0.0 135.0 930.0 0.550000 \n",
+ "2 0.0 4.0 0.0 132.0 930.0 0.910000 \n",
+ "3 11.0 14.0 0.0 136.0 930.0 0.670000 \n",
+ "4 0.0 -17.0 0.0 151.0 1005.0 0.620000 \n",
+ "... ... ... ... ... ... ... \n",
+ "1915881 -9.0 -6.0 0.0 112.0 NaN 0.794884 \n",
+ "1915882 -2.0 -1.0 0.0 106.0 NaN 0.538399 \n",
+ "1915883 -8.0 -25.0 0.0 106.0 NaN 0.955579 \n",
+ "1915884 -9.0 -6.0 0.0 112.0 NaN 0.595344 \n",
+ "1915885 -8.0 5.0 0.0 117.0 NaN 0.350192 \n",
+ "\n",
+ " ORIGIN_STATE_NAME DEST_STATE_NAME \n",
+ "0 FL OH \n",
+ "1 FL OH \n",
+ "2 FL OH \n",
+ "3 FL OH \n",
+ "4 FL TX \n",
+ "... ... ... \n",
+ "1915881 OK NC \n",
+ "1915882 OK NC \n",
+ "1915883 OK NC \n",
+ "1915884 OK NC \n",
+ "1915885 OK NC \n",
+ "\n",
+ "[1915886 rows x 18 columns]"
+ ]
+ },
+ "execution_count": 282,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "routes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Tickets\n",
+ "- YEAR to int\n",
+ "- clean itin_fare"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 283,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# year column to int year\n",
+ "ticket_price['YEAR'] = ticket_price['YEAR'].astype(int)\n",
+ "\n",
+ "# clean and convert price to float\n",
+ "def find_number(text):\n",
+ " if type(text) != str:\n",
+ " return np.nan\n",
+ " re_result = re.search(r'[\\d\\.]+', text)\n",
+ " if re_result is not None:\n",
+ " return float(re_result.group(0))\n",
+ " return np.nan\n",
+ "\n",
+ "ticket_price['ITIN_FARE'] = ticket_price['ITIN_FARE'].apply(find_number)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 284,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ITIN_ID | \n",
+ " YEAR | \n",
+ " QUARTER | \n",
+ " ORIGIN | \n",
+ " ORIGIN_COUNTRY | \n",
+ " ORIGIN_STATE_ABR | \n",
+ " ORIGIN_STATE_NM | \n",
+ " ROUNDTRIP | \n",
+ " REPORTING_CARRIER | \n",
+ " PASSENGERS | \n",
+ " ITIN_FARE | \n",
+ " DESTINATION | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 201912723049 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " ABI | \n",
+ " US | \n",
+ " TX | \n",
+ " Texas | \n",
+ " 1.0 | \n",
+ " MQ | \n",
+ " 1.0 | \n",
+ " 736.0 | \n",
+ " DAB | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 201912723085 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " ABI | \n",
+ " US | \n",
+ " TX | \n",
+ " Texas | \n",
+ " 1.0 | \n",
+ " MQ | \n",
+ " 1.0 | \n",
+ " 570.0 | \n",
+ " COS | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 201912723491 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " ABI | \n",
+ " US | \n",
+ " TX | \n",
+ " Texas | \n",
+ " 1.0 | \n",
+ " MQ | \n",
+ " 1.0 | \n",
+ " 564.0 | \n",
+ " MCO | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 201912723428 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " ABI | \n",
+ " US | \n",
+ " TX | \n",
+ " Texas | \n",
+ " 1.0 | \n",
+ " MQ | \n",
+ " 1.0 | \n",
+ " 345.0 | \n",
+ " LGA | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 201912723509 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " ABI | \n",
+ " US | \n",
+ " TX | \n",
+ " Texas | \n",
+ " 0.0 | \n",
+ " MQ | \n",
+ " 1.0 | \n",
+ " 309.0 | \n",
+ " MGM | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1167280 | \n",
+ " 201911284909 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " YAK | \n",
+ " US | \n",
+ " AK | \n",
+ " Alaska | \n",
+ " 0.0 | \n",
+ " AS | \n",
+ " 1.0 | \n",
+ " 244.0 | \n",
+ " ANC | \n",
+ "
\n",
+ " \n",
+ " 1167281 | \n",
+ " 201911284959 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " YAK | \n",
+ " US | \n",
+ " AK | \n",
+ " Alaska | \n",
+ " 1.0 | \n",
+ " AS | \n",
+ " 1.0 | \n",
+ " 371.0 | \n",
+ " JNU | \n",
+ "
\n",
+ " \n",
+ " 1167282 | \n",
+ " 201911284940 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " YAK | \n",
+ " US | \n",
+ " AK | \n",
+ " Alaska | \n",
+ " 0.0 | \n",
+ " AS | \n",
+ " 1.0 | \n",
+ " 271.0 | \n",
+ " JNU | \n",
+ "
\n",
+ " \n",
+ " 1167283 | \n",
+ " 201911284914 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " YAK | \n",
+ " US | \n",
+ " AK | \n",
+ " Alaska | \n",
+ " 0.0 | \n",
+ " AS | \n",
+ " 1.0 | \n",
+ " 603.0 | \n",
+ " ANC | \n",
+ "
\n",
+ " \n",
+ " 1167284 | \n",
+ " 201911284952 | \n",
+ " 2019 | \n",
+ " 1 | \n",
+ " YAK | \n",
+ " US | \n",
+ " AK | \n",
+ " Alaska | \n",
+ " 1.0 | \n",
+ " AS | \n",
+ " 1.0 | \n",
+ " 299.0 | \n",
+ " JNU | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1167285 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ITIN_ID YEAR QUARTER ORIGIN ORIGIN_COUNTRY ORIGIN_STATE_ABR \\\n",
+ "0 201912723049 2019 1 ABI US TX \n",
+ "1 201912723085 2019 1 ABI US TX \n",
+ "2 201912723491 2019 1 ABI US TX \n",
+ "3 201912723428 2019 1 ABI US TX \n",
+ "4 201912723509 2019 1 ABI US TX \n",
+ "... ... ... ... ... ... ... \n",
+ "1167280 201911284909 2019 1 YAK US AK \n",
+ "1167281 201911284959 2019 1 YAK US AK \n",
+ "1167282 201911284940 2019 1 YAK US AK \n",
+ "1167283 201911284914 2019 1 YAK US AK \n",
+ "1167284 201911284952 2019 1 YAK US AK \n",
+ "\n",
+ " ORIGIN_STATE_NM ROUNDTRIP REPORTING_CARRIER PASSENGERS ITIN_FARE \\\n",
+ "0 Texas 1.0 MQ 1.0 736.0 \n",
+ "1 Texas 1.0 MQ 1.0 570.0 \n",
+ "2 Texas 1.0 MQ 1.0 564.0 \n",
+ "3 Texas 1.0 MQ 1.0 345.0 \n",
+ "4 Texas 0.0 MQ 1.0 309.0 \n",
+ "... ... ... ... ... ... \n",
+ "1167280 Alaska 0.0 AS 1.0 244.0 \n",
+ "1167281 Alaska 1.0 AS 1.0 371.0 \n",
+ "1167282 Alaska 0.0 AS 1.0 271.0 \n",
+ "1167283 Alaska 0.0 AS 1.0 603.0 \n",
+ "1167284 Alaska 1.0 AS 1.0 299.0 \n",
+ "\n",
+ " DESTINATION \n",
+ "0 DAB \n",
+ "1 COS \n",
+ "2 MCO \n",
+ "3 LGA \n",
+ "4 MGM \n",
+ "... ... \n",
+ "1167280 ANC \n",
+ "1167281 JNU \n",
+ "1167282 JNU \n",
+ "1167283 ANC \n",
+ "1167284 JNU \n",
+ "\n",
+ "[1167285 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 284,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ticket_price"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## AirportsInfo\n",
+ "- COORDINATES into atomic data set\n",
+ " - first one is longitude\n",
+ " - second one is latitude"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 285,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# clean coordinates\n",
+ "\n",
+ "airportsInfo['COORDINATES_LONGITUDE'] = airportsInfo['COORDINATES'].apply(lambda x: x.split(', ')[0]).astype(float)\n",
+ "airportsInfo['COORDINATES_LATITUDE'] = airportsInfo['COORDINATES'].apply(lambda x: x.split(', ')[1]).astype(float)\n",
+ "airportsInfo.drop(columns=['COORDINATES'], inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 286,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 55369 entries, 0 to 55368\n",
+ "Data columns (total 9 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 TYPE 55369 non-null object \n",
+ " 1 NAME 55369 non-null object \n",
+ " 2 ELEVATION_FT 48354 non-null float64\n",
+ " 3 CONTINENT 27526 non-null object \n",
+ " 4 ISO_COUNTRY 55122 non-null object \n",
+ " 5 MUNICIPALITY 49663 non-null object \n",
+ " 6 IATA_CODE 9182 non-null object \n",
+ " 7 COORDINATES_LONGITUDE 55369 non-null float64\n",
+ " 8 COORDINATES_LATITUDE 55369 non-null float64\n",
+ "dtypes: float64(3), object(6)\n",
+ "memory usage: 3.8+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "airportsInfo.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 287,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " TYPE | \n",
+ " NAME | \n",
+ " ELEVATION_FT | \n",
+ " CONTINENT | \n",
+ " ISO_COUNTRY | \n",
+ " MUNICIPALITY | \n",
+ " IATA_CODE | \n",
+ " COORDINATES_LONGITUDE | \n",
+ " COORDINATES_LATITUDE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " heliport | \n",
+ " Total Rf Heliport | \n",
+ " 11.0 | \n",
+ " NaN | \n",
+ " US | \n",
+ " Bensalem | \n",
+ " NaN | \n",
+ " -74.933601 | \n",
+ " 40.070801 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " small_airport | \n",
+ " Aero B Ranch Airport | \n",
+ " 3435.0 | \n",
+ " NaN | \n",
+ " US | \n",
+ " Leoti | \n",
+ " NaN | \n",
+ " -101.473911 | \n",
+ " 38.704022 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " small_airport | \n",
+ " Lowell Field | \n",
+ " 450.0 | \n",
+ " NaN | \n",
+ " US | \n",
+ " Anchor Point | \n",
+ " NaN | \n",
+ " -151.695999 | \n",
+ " 59.949200 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " small_airport | \n",
+ " Epps Airpark | \n",
+ " 820.0 | \n",
+ " NaN | \n",
+ " US | \n",
+ " Harvest | \n",
+ " NaN | \n",
+ " -86.770302 | \n",
+ " 34.864799 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " closed | \n",
+ " Newport Hospital & Clinic Heliport | \n",
+ " 237.0 | \n",
+ " NaN | \n",
+ " US | \n",
+ " Newport | \n",
+ " NaN | \n",
+ " -91.254898 | \n",
+ " 35.608700 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 55364 | \n",
+ " medium_airport | \n",
+ " Yingkou Lanqi Airport | \n",
+ " 0.0 | \n",
+ " AS | \n",
+ " CN | \n",
+ " Yingkou | \n",
+ " YKH | \n",
+ " 122.358600 | \n",
+ " 40.542524 | \n",
+ "
\n",
+ " \n",
+ " 55365 | \n",
+ " medium_airport | \n",
+ " Shenyang Dongta Airport | \n",
+ " NaN | \n",
+ " AS | \n",
+ " CN | \n",
+ " Shenyang | \n",
+ " NaN | \n",
+ " 123.496002 | \n",
+ " 41.784401 | \n",
+ "
\n",
+ " \n",
+ " 55366 | \n",
+ " heliport | \n",
+ " Sealand Helipad | \n",
+ " 40.0 | \n",
+ " EU | \n",
+ " GB | \n",
+ " Sealand | \n",
+ " NaN | \n",
+ " 1.482500 | \n",
+ " 51.894444 | \n",
+ "
\n",
+ " \n",
+ " 55367 | \n",
+ " small_airport | \n",
+ " Glorioso Islands Airstrip | \n",
+ " 11.0 | \n",
+ " AF | \n",
+ " TF | \n",
+ " Grande Glorieuse | \n",
+ " NaN | \n",
+ " 47.296389 | \n",
+ " -11.584278 | \n",
+ "
\n",
+ " \n",
+ " 55368 | \n",
+ " small_airport | \n",
+ " Satsuma IÅjima Airport | \n",
+ " 338.0 | \n",
+ " AS | \n",
+ " JP | \n",
+ " Mishima-Mura | \n",
+ " NaN | \n",
+ " 130.270556 | \n",
+ " 30.784722 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
55369 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " TYPE NAME ELEVATION_FT \\\n",
+ "0 heliport Total Rf Heliport 11.0 \n",
+ "1 small_airport Aero B Ranch Airport 3435.0 \n",
+ "2 small_airport Lowell Field 450.0 \n",
+ "3 small_airport Epps Airpark 820.0 \n",
+ "4 closed Newport Hospital & Clinic Heliport 237.0 \n",
+ "... ... ... ... \n",
+ "55364 medium_airport Yingkou Lanqi Airport 0.0 \n",
+ "55365 medium_airport Shenyang Dongta Airport NaN \n",
+ "55366 heliport Sealand Helipad 40.0 \n",
+ "55367 small_airport Glorioso Islands Airstrip 11.0 \n",
+ "55368 small_airport Satsuma IÅjima Airport 338.0 \n",
+ "\n",
+ " CONTINENT ISO_COUNTRY MUNICIPALITY IATA_CODE \\\n",
+ "0 NaN US Bensalem NaN \n",
+ "1 NaN US Leoti NaN \n",
+ "2 NaN US Anchor Point NaN \n",
+ "3 NaN US Harvest NaN \n",
+ "4 NaN US Newport NaN \n",
+ "... ... ... ... ... \n",
+ "55364 AS CN Yingkou YKH \n",
+ "55365 AS CN Shenyang NaN \n",
+ "55366 EU GB Sealand NaN \n",
+ "55367 AF TF Grande Glorieuse NaN \n",
+ "55368 AS JP Mishima-Mura NaN \n",
+ "\n",
+ " COORDINATES_LONGITUDE COORDINATES_LATITUDE \n",
+ "0 -74.933601 40.070801 \n",
+ "1 -101.473911 38.704022 \n",
+ "2 -151.695999 59.949200 \n",
+ "3 -86.770302 34.864799 \n",
+ "4 -91.254898 35.608700 \n",
+ "... ... ... \n",
+ "55364 122.358600 40.542524 \n",
+ "55365 123.496002 41.784401 \n",
+ "55366 1.482500 51.894444 \n",
+ "55367 47.296389 -11.584278 \n",
+ "55368 130.270556 30.784722 \n",
+ "\n",
+ "[55369 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 287,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "airportsInfo"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "tongConsultinInc",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.19"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data_preprocessing.py b/data_preprocessing.py
new file mode 100644
index 0000000..6baba43
--- /dev/null
+++ b/data_preprocessing.py
@@ -0,0 +1,61 @@
+import pandas as pd
+import numpy as np
+import re
+
+
+def str_to_float(text):
+ """
+ convert a string to a float. if the input is Two, convert it to 2.0
+ ---
+ text: str to be converted
+ ---
+ return: converted float or np.nan if conversion fails
+ >>> str_to_float("1.0")
+ 1.0
+ >>> str_to_float("1")
+ 1.0
+ >>> str_to_float("1.0.0")
+ nan
+ >>> str_to_float("Two")
+ 2.0
+ >>> str_to_float("-1.0")
+ 1.0
+ >>> str_to_float("1.0-")
+ nan
+ >>> str_to_float("20000.00")
+ 20000.0
+ """
+ try:
+ return abs(float(text))
+ except:
+ if text == "Two":
+ return 2.0
+ return np.nan
+
+
+def main():
+ # loading data
+ flights = pd.read_csv("data/original_data/Flights.csv")
+ tickets = pd.read_csv("data/original_data/Tickets.csv")
+ airport_codes = pd.read_csv("data/original_data/Airport_Codes.csv")
+
+ # cleaning flights data
+ flights["FL_DATE"] = pd.to_datetime(flights["FL_DATE"])
+
+ # finding the specific city and state name
+ split_ORIGIN_CITY_STATE = flights["ORIGIN_CITY_NAME"].str.split(", ")
+ split_DEST_CITY_STATE = flights["DEST_CITY_NAME"].str.split(", ")
+
+ flights["ORIGIN_CITY_NAME"] = split_ORIGIN_CITY_STATE.str[0]
+ flights["ORIGIN_STATE_NAME"] = split_ORIGIN_CITY_STATE.str[1]
+
+ flights["DEST_STATE_NAME"] = split_DEST_CITY_STATE.str[1]
+ flights["DEST_CITY_NAME"] = split_DEST_CITY_STATE.str[0]
+
+ flights["DISTANCE"] = flights["DISTANCE"].apply(str_to_float)
+
+ flights.to_csv("data/cleaned_data/flights.csv", index=False)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..7f07acf
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,161 @@
+anyio @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a17a7759g2/croot/anyio_1706220182417/work
+appnope @ file:///Users/ktietz/demo/mc3/conda-bld/appnope_1629146036738/work
+argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work
+argon2-cffi-bindings @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/croot-wbf5edig/argon2-cffi-bindings_1644845754377/work
+asttokens @ file:///opt/conda/conda-bld/asttokens_1646925590279/work
+async-lru @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_02efro5ps8/croot/async-lru_1699554529181/work
+attrs @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_224434dqzl/croot/attrs_1695717839274/work
+Babel @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_00k1rl2pus/croot/babel_1671781944131/work
+backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
+beautifulsoup4 @ file:///tmp/build/80754af9/beautifulsoup4_1631874778482/work
+bleach @ file:///opt/conda/conda-bld/bleach_1641577558959/work
+brotlipy==0.7.0
+cachetools==5.3.3
+certifi @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3bzbkiv4h_/croot/certifi_1707229182618/work/certifi
+cffi @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_7a9c7wyorr/croot/cffi_1714483157752/work
+charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
+click==8.1.7
+comm @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_3doui0bmzb/croot/comm_1709322861485/work
+cryptography @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_31zgxm62w8/croot/cryptography_1714660690857/work
+cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work
+debugpy @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_563_nwtkoc/croot/debugpy_1690905063850/work
+decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work
+defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work
+dill==0.3.8
+docopt @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_5alx0ctp1q/croots/recipe/docopt_1663662430075/work
+entrypoints @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_croot-jb01gaox/entrypoints_1650293758411/work
+exceptiongroup @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_b2258scr33/croot/exceptiongroup_1706031391815/work
+execnet @ file:///tmp/build/80754af9/execnet_1623921183358/work
+executing @ file:///opt/conda/conda-bld/executing_1646925071911/work
+fastjsonschema @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_43a0jaiddu/croots/recipe/python-fastjsonschema_1661368628129/work
+fonttools @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_60c8ux4mkl/croot/fonttools_1713551354374/work
+google-api-core==2.19.0
+google-api-python-client==2.129.0
+google-auth==2.29.0
+google-auth-httplib2==0.2.0
+google-auth-oauthlib==1.2.0
+googleapis-common-protos==1.63.0
+greenlet @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_516imz09pb/croot/greenlet_1702059966336/work
+gspread==6.1.2
+httplib2==0.22.0
+idna @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a12xpo84t2/croot/idna_1714398852854/work
+importlib-metadata @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5498c88e7n/croot/importlib_metadata-suite_1704813534254/work
+importlib-resources @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_77lzrsh8mp/croot/importlib_resources-suite_1704281852961/work
+iniconfig @ file:///home/linux1/recipes/ci/iniconfig_1610983019677/work
+ipykernel @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f428_5tjvx/croot/ipykernel_1705933835534/work
+ipysheet==0.7.0
+ipython @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_84r7osg3nm/croot/ipython_1691532095330/work
+ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work
+ipywidgets==8.1.3
+jedi @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/croot-f1t6hma6/jedi_1644315882177/work
+Jinja2 @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_7dognxkzoy/croot/jinja2_1706733627811/work
+joblib @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f75pzkv6n1/croot/joblib_1713976769255/work
+json5 @ file:///tmp/build/80754af9/json5_1624432770122/work
+jsonschema @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_27o3go8sqa/croot/jsonschema_1699041627313/work
+jsonschema-specifications @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_d38pclgu95/croot/jsonschema-specifications_1699032390832/work
+jupyter-events @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_14ldd9s4d0/croot/jupyter_events_1699282481406/work
+jupyter-lsp @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_ae9br5v37x/croot/jupyter-lsp-meta_1699978259353/work
+jupyter_client @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_aen57n2aow/croot/jupyter_client_1676329104065/work
+jupyter_core @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_782yoyc_98/croot/jupyter_core_1698937318631/work
+jupyter_server @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_281pz9vly5/croot/jupyter_server_1699466465530/work
+jupyter_server_terminals @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_e7ryd60iuw/croot/jupyter_server_terminals_1686870731283/work
+jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1694728214446/work
+jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
+jupyterlab_server @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_315a64u22w/croot/jupyterlab_server_1699555438434/work
+jupyterlab_widgets==3.0.11
+kiwisolver @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_93o8te804v/croot/kiwisolver_1672387163224/work
+lxml @ file:///Users/runner/miniforge3/conda-bld/lxml_1704590488726/work
+markdown-it-py==3.0.0
+MarkupSafe @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_a84ni4pci8/croot/markupsafe_1704206002077/work
+matplotlib @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_croot-_3usbnl1/matplotlib-suite_1647506475477/work
+matplotlib-inline @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_f6fdc0hldi/croots/recipe/matplotlib-inline_1662014472341/work
+mdurl==0.1.2
+mistune @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_17ya6k1sbs/croots/recipe/mistune_1661496228719/work
+mypy-extensions==0.4.3
+nbclient @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_626hpwnurm/croot/nbclient_1698934218848/work
+nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert-meta_1693331710275/work
+nbformat @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_cbnf5nccgk/croot/nbformat_1694616744196/work
+nest-asyncio @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_310vb5e2a0/croot/nest-asyncio_1708532678212/work
+notebook @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_dfmids47bo/croots/recipe/notebook_1659083663569/work
+notebook_shim @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d6_ze10f45/croot/notebook-shim_1699455897525/work
+numpy @ file:///Users/ktietz/ci_310/numpy_and_numpy_base_1644255524335/work
+oauthlib==3.2.2
+olefile @ file:///Users/ktietz/demo/mc3/conda-bld/olefile_1629805411829/work
+otter-grader==3.1.4
+overrides @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_70s80guh9g/croot/overrides_1699371144462/work
+packaging @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a6lqg7at4g/croot/packaging_1710807410750/work
+pandas==1.3.3
+pandas-tutor==2.0.3
+pandocfilters @ file:///opt/conda/conda-bld/pandocfilters_1643405455980/work
+parso @ file:///opt/conda/conda-bld/parso_1641458642106/work
+patsy==0.5.3
+pdfkit==1.0.0
+pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work
+pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
+Pillow @ file:///Users/runner/miniforge3/conda-bld/pillow_1630696687447/work
+pkgutil_resolve_name @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_98lcqyhajf/croot/pkgutil-resolve-name_1704297463060/work
+platformdirs @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a8u4fy8k9o/croot/platformdirs_1692205661656/work
+plotly @ file:///home/conda/feedstock_root/build_artifacts/plotly_1694802097009/work
+pluggy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_croot-w6jyveby/pluggy_1648109277227/work
+pooch @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_1bocfs80f4/croot/pooch_1695850117888/work
+prometheus-client @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_25sgeyk0j5/croots/recipe/prometheus_client_1659455103277/work
+prompt-toolkit @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_c63v4kqjzr/croot/prompt-toolkit_1704404354115/work
+proto-plus==1.23.0
+protobuf==4.25.3
+psutil @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_1310b568-21f4-4cb0-b0e3-2f3d31e39728k9coaga5/croots/recipe/psutil_1656431280844/work
+ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
+pure-eval @ file:///opt/conda/conda-bld/pure_eval_1646925070566/work
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
+pydantic==1.10.2
+Pygments @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_29bs9f_dh9/croot/pygments_1684279974747/work
+pyOpenSSL @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_94bn0tgaw5/croot/pyopenssl_1708381744097/work
+pyparsing @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_3b_3vxnd07/croots/recipe/pyparsing_1661452540919/work
+PyPDF2==3.0.1
+PySocks @ file:///Users/ktietz/Code/oss/ci_pkgs/pysocks_1626781349491/work
+pytest @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_75ehl8i878/croot/pytest_1690474711033/work
+pytest-xdist @ file:///home/conda/feedstock_root/build_artifacts/pytest-xdist_1684499835847/work
+python-dateutil @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_6e_fc8xema/croot/python-dateutil_1715108793034/work
+python-json-logger @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_c3baq2ko4j/croot/python-json-logger_1683823815343/work
+python-on-whales==0.71.0
+pytz @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a4b76c83ik/croot/pytz_1713974318928/work
+PyYAML @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_a8_sdgulmz/croot/pyyaml_1698096054705/work
+pyzmq @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_cbtlm0pib_/croot/pyzmq_1709318330127/work
+referencing @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_5cz64gsx70/croot/referencing_1699012046031/work
+requests @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_b3tnputioh/croot/requests_1707355573919/work
+requests-oauthlib==2.0.0
+rfc3339-validator @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_76ae5cu30h/croot/rfc3339-validator_1683077051957/work
+rfc3986-validator @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d0l5zd97kt/croot/rfc3986-validator_1683058998431/work
+rich==13.7.1
+rpds-py @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_f8jkozoefm/croot/rpds-py_1698945944860/work
+rsa==4.9
+scikit-learn @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/croot-ic3nfbzi/scikit-learn_1642621481325/work
+scipy==1.10.1
+seaborn @ file:///tmp/build/80754af9/seaborn_1629307859561/work
+Send2Trash @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_5b31f0zzlv/croot/send2trash_1699371144121/work
+shellingham==1.5.4
+six @ file:///tmp/build/80754af9/six_1644875935023/work
+sniffio @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_1573pknjrg/croot/sniffio_1705431298885/work
+soupsieve @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_9798xzs_03/croot/soupsieve_1696347567192/work
+SQLAlchemy @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_da4pw1i5_7/croot/sqlalchemy_1695720907140/work
+stack-data @ file:///opt/conda/conda-bld/stack_data_1646927590127/work
+statsmodels @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_94gox2humz/croot/statsmodels_1676644453811/work
+tenacity @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_0ew5sfng29/croot/tenacity_1682972282256/work
+terminado @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_fcfvyc0an2/croot/terminado_1671751835701/work
+threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work
+tinycss2 @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_fcw5_i306t/croot/tinycss2_1668168825117/work
+tomli @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d0e5ffbf-5cf1-45be-8693-c5dff8108a2awhthtjlq/croots/recipe/tomli_1657175508477/work
+tornado @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_3a5nrn2jeh/croot/tornado_1696936974091/work
+tqdm @ file:///tmp/build/80754af9/tqdm_1635330843403/work
+traitlets @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_6301rd5qbe/croot/traitlets_1671143894285/work
+typer==0.12.3
+typing_extensions==4.1.1
+unicodedata2 @ file:///private/var/folders/k1/30mswbxs7r1g6zwn8y4fyt500000gp/T/abs_a3epjto7gs/croot/unicodedata2_1713212955584/work
+uritemplate==4.1.1
+urllib3==1.26.7
+wcwidth @ file:///Users/ktietz/demo/mc3/conda-bld/wcwidth_1629357192024/work
+webencodings==0.5.1
+websocket-client @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_d37u7gqts8/croot/websocket-client_1715878310260/work
+widgetsnbextension==4.0.11
+zipp @ file:///private/var/folders/nz/j6p8yfhx1mv_0grj5xl4650h0000gp/T/abs_31jm3q76eq/croot/zipp_1704206913245/work