Skip to content

Commit

Permalink
Fixed bugs and updated Streamlit app
Browse files Browse the repository at this point in the history
  • Loading branch information
carlosfab committed Oct 4, 2023
1 parent b1c0cb6 commit 783f91c
Show file tree
Hide file tree
Showing 19 changed files with 371 additions and 45 deletions.
44 changes: 22 additions & 22 deletions notebooks/12_simulated_feature_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@
" \"\"\"\n",
" \n",
" # Calculate equivalent date range from a year ago\n",
" from_date_ = from_date - timedelta(weeks=52)\n",
" to_date_ = to_date - timedelta(weeks=52)\n",
" from_date_ = from_date - timedelta(days=7*52)\n",
" to_date_ = to_date - timedelta(days=7*52)\n",
"\n",
" print(f\"Fetching raw data from {from_date_} to {to_date_}\")\n",
"\n",
Expand All @@ -84,7 +84,7 @@
" rides = pd.concat([rides, rides_2])\n",
"\n",
" # Shift the data by 52 weeks to make it look recent\n",
" rides[\"pickup_datetime\"] += timedelta(weeks=52)\n",
" rides[\"pickup_datetime\"] += timedelta(days=7*52)\n",
" \n",
" # Sort the dataframe by location and datetime\n",
" rides.sort_values(by=[\"pickup_location_id\", \"pickup_datetime\"], inplace=True)\n",
Expand All @@ -101,7 +101,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Fetching raw data from 2022-09-03 11:00:00 to 2022-10-01 11:00:00\n",
"Fetching raw data from 2022-09-07 02:00:00 to 2022-10-05 02:00:00\n",
"File 2022-09 was already in local storage\n",
"File 2022-10 was already in local storage\n"
]
Expand All @@ -125,7 +125,7 @@
"See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
"DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
"See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n",
"100%|██████████| 265/265 [00:00<00:00, 272.82it/s]\n"
"100%|██████████| 265/265 [00:01<00:00, 254.10it/s]\n"
]
},
{
Expand Down Expand Up @@ -157,32 +157,32 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2023-09-02 11:00:00</td>\n",
" <td>1</td>\n",
" <td>2023-09-06 02:00:00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2023-09-02 12:00:00</td>\n",
" <td>1</td>\n",
" <td>2023-09-06 03:00:00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2023-09-02 13:00:00</td>\n",
" <td>1</td>\n",
" <td>2023-09-06 04:00:00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2023-09-02 14:00:00</td>\n",
" <td>3</td>\n",
" <td>2023-09-06 05:00:00</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2023-09-02 15:00:00</td>\n",
" <td>6</td>\n",
" <td>2023-09-06 06:00:00</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
Expand All @@ -191,11 +191,11 @@
],
"text/plain": [
" pickup_hour rides pickup_location_id\n",
"0 2023-09-02 11:00:00 1 1\n",
"1 2023-09-02 12:00:00 1 1\n",
"2 2023-09-02 13:00:00 1 1\n",
"3 2023-09-02 14:00:00 3 1\n",
"4 2023-09-02 15:00:00 6 1"
"0 2023-09-06 02:00:00 0 1\n",
"1 2023-09-06 03:00:00 0 1\n",
"2 2023-09-06 04:00:00 0 1\n",
"3 2023-09-06 05:00:00 0 1\n",
"4 2023-09-06 06:00:00 3 1"
]
},
"execution_count": 5,
Expand Down Expand Up @@ -253,7 +253,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "88ad1c2efb6d439aa09b2ace324b9b04",
"model_id": "b50ca4ac755e467b8846d43ede1f7e60",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -276,7 +276,7 @@
{
"data": {
"text/plain": [
"(<hsfs.core.job.Job at 0x14ca6ec20>, None)"
"(<hsfs.core.job.Job at 0x143a28a00>, None)"
]
},
"execution_count": 7,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pydeck = "^0.8.0"
geopandas = "^0.12.2"
discordwebhook = "^1.0.3"
fire = "^0.5.0"
schema = "^0.7.5"

[tool.poetry.dev-dependencies]
pytest = "^5.2"
Expand Down
Binary file added src/.DS_Store
Binary file not shown.
1 change: 1 addition & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '0.1.0'
Binary file added src/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/app_component.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/config.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/data.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/data_split.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/feature_store_api.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/inference.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/model.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/paths.cpython-310.pyc
Binary file not shown.
Binary file added src/__pycache__/plot.cpython-310.pyc
Binary file not shown.
41 changes: 31 additions & 10 deletions src/app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Imports
from datetime import datetime
import pandas as pd
import streamlit as st
import app_component as ac
from src.inference import load_batch_of_features_from_store

# Page configuration
st.set_page_config(
Expand All @@ -14,21 +17,48 @@
unsafe_allow_html=True
)


# Header Section
home_title = "Taxi Demand Prediction"
st.markdown(f"""# {home_title} <span style=color:#2E9BF5><font size=5>Web App</font></span>""", unsafe_allow_html=True)

st.markdown("""\n""")

loading_info = st.empty()

st.markdown("#### Greetings 🚖")
st.write(
"""
Welcome to the Taxi Demand Predictor Hub, where cutting-edge Machine Learning meets urban mobility needs. This web application is an integral component of an end-to-end Machine Learning Project. For those interested in the technical aspects, the project repository offers comprehensive insights, including explanatory notebooks, source code, and automation utilities.
Welcome to the Taxi Demand Predictor Hub, where cutting-edge Machine Learning meets urban mobility needs. For those interested in the technical aspects, the project repository offers comprehensive insights.
"""
)

# App Component
ac.robo_avatar_component()

# Sidebar
progress_bar = st.sidebar.header(":gear: Project Progress")
progress_bar = st.sidebar.progress(0)

# constant for number of steps in progress bar
N_STEPS = 7

# STEP 1 - load shape data for NYC taxi zones
with loading_info, st.spinner("Downloading data... this may take a while! \n Don't worry, this is a one-time thing. :wink:"):
shape_data = ac.load_shape_data_file()
st.sidebar.write(":white_check_mark: Shape data download complete!")
progress_bar.progress(1/N_STEPS)

# STEP 2 - Fetch batch of inference data
with loading_info, st.spinner("Fetching data from Feature Store..."):
current_date = pd.to_datetime(datetime.utcnow()).floor('H')
features = load_batch_of_features_from_store(current_date)
st.sidebar.write(":white_check_mark: Inference data fetched!")
progress_bar.progress(2/N_STEPS)

ac.render_contact()


# Real-world Machine Learning Section
st.markdown("\n")
st.markdown("#### Real-World Machine Learning 🛠")
Expand All @@ -49,12 +79,3 @@

# Repository Link Button
st.link_button(":star: Star the Repository!", "https://github.com/carlosfab/taxi_demand_predictor", type='secondary', use_container_width=True)

# Sidebar
st.sidebar.image("header.gif", use_column_width=True)
st.sidebar.title("Contact")
st.sidebar.info(
"""
Carlos at [sigmoidal.ai](https://sigmoidal.ai/en) | [GitHub](https://github.com/carlosfab) | [Twitter](https://twitter.com/carlos_melo_py) | [YouTube](https://www.youtube.com/@CarlosMeloSigmoidal) | [Instagram](http://instagram.com/carlos_melo.py) | [LinkedIn](http://linkedin.com/in/carlos-melo-data-science/)
"""
)
43 changes: 42 additions & 1 deletion src/app_component.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# Standard Libraries
import random as r
import random
import zipfile
import requests

# External Libraries
import streamlit as st
import geopandas as gpd
import streamlit.components.v1 as c
import src.config as config
from src.paths import DATA_DIR


def robo_avatar_component():
Expand Down Expand Up @@ -51,3 +56,39 @@ def render_cta():
st.write("Let's connect!")
st_button(url="https://twitter.com/carlos_melo_py", label="Twitter", font_awesome_icon="fa-twitter")
st_button(url="http://linkedin.com/in/carlos-melo-data-science/", label="LinkedIn", font_awesome_icon="fa-linkedin")


def render_contact():
st.sidebar.title("Contact")
st.sidebar.info(
"""
Carlos at [sigmoidal.ai](https://sigmoidal.ai/en) | [GitHub](https://github.com/carlosfab) | [Twitter](https://twitter.com/carlos_melo_py) | [YouTube](https://www.youtube.com/@CarlosMeloSigmoidal) | [Instagram](http://instagram.com/carlos_melo.py) | [LinkedIn](http://linkedin.com/in/carlos-melo-data-science/)
"""
)


def load_shape_data_file() -> gpd.geopandas.GeoDataFrame:
"""
Load shape data for NYC taxi zones.
Returns:
- GeoDataFrame: A GeoDataFrame containing the shape data for NYC taxi zones.
"""
# download zip file
url_path = "https://d37ci6vzurychx.cloudfront.net/misc/taxi_zones.zip"
path = DATA_DIR / 'taxi_zones.zip'
response = requests.get(url_path)

if response.status_code == 200:
with open(path, 'wb') as f:
f.write(response.content)
else:
raise Exception(f"Could not download data from {url_path}")

# unzip file
with zipfile.ZipFile(path, 'r') as zip_ref:
zip_ref.extractall(DATA_DIR / 'taxi_zones')

# load and return shape data
shape_data = gpd.read_file(DATA_DIR / 'taxi_zones/taxi_zones.shp')
return shape_data
11 changes: 11 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,18 @@

# Constants
HOPSWORKS_PROJECT_NAME = 'taxi_demand_api'

FEATURE_GROUP_NAME = 'time_series_hourly_feature_group'
FEATURE_GROUP_VERSION = 1

FEATURE_VIEW_NAME = 'time_series_hourly_feature_view'
FEATURE_VIEW_VERSION = 1

N_FEATURES = 24 * 28

MODEL_NAME = "taxi_demand_predictor_next_hour"
MODEL_VERSION = 1

FEATURE_GROUP_MODEL_PREDICTIONS = 'model_predictions_feature_group'
FEATURE_VIEW_MODEL_PREDICTIONS = 'model_predictions_feature_view'
FEATURE_VIEW_MONITORING = 'predictions_vs_actuals_for_monitoring_feature_view'
58 changes: 56 additions & 2 deletions src/feature_store_api.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,56 @@
def get_feature_store():
pass
# --- Import libraries ---
from typing import Optional
import hsfs
import hopsworks

# Import local configurations
import src.config as config

# --- Function Definitions ---


def get_feature_store() -> hsfs.feature_store.FeatureStore:
"""
Connects to Hopsworks and retrieves a pointer to the feature store.
This function uses the Hopsworks project name and API key from the
configuration to connect to the Hopsworks instance.
Returns:
hsfs.feature_store.FeatureStore: Pointer to the feature store.
"""

# Log in to Hopsworks using configuration details
project = hopsworks.login(
project=config.HOPSWORKS_PROJECT_NAME,
api_key_value=config.HOPSWORKS_API_KEY
)

# Return feature store associated with the project
return project.get_feature_store()


def get_feature_group(
name: str,
version: Optional[int] = 1
) -> hsfs.feature_group.FeatureGroup:
"""
Connects to the feature store and retrieves a pointer to the specified
feature group by its name and version.
Args:
name (str): Name of the feature group.
version (Optional[int], optional): Version of the feature group. Defaults to 1.
Returns:
hsfs.feature_group.FeatureGroup: Pointer to the feature group.
"""

# Get feature store
feature_store = get_feature_store()

# Return the desired feature group from the feature store
return feature_store.get_feature_group(
name=name,
version=version,
)
Loading

0 comments on commit 783f91c

Please sign in to comment.