Skip to content

Commit

Permalink
chore: gh action debug
Browse files Browse the repository at this point in the history
  • Loading branch information
BlairCurrey committed Feb 6, 2024
1 parent 8775378 commit 4d12d43
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 14 deletions.
16 changes: 3 additions & 13 deletions nfl_analytics/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from urllib.error import HTTPError
import os
import sqlite3
from pathlib import Path

import pandas as pd

Expand All @@ -23,16 +22,7 @@


def download_data(years=range(1999, 2024)):
# print("gh actions doesnt like os.makedirs with this: ", DATA_DIR_TEST)
# print("ASSET_DIR debug: ", ASSET_DIR)
# data_directory = os.path.join(THIS_DIR, DATA_DIR)
# os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)
# os.makedirs(data_directory, exist_ok=True)

# this_dir = Path(__file__).resolve().parent
# data_directory = this_dir / DATA_DIR
# data_directory.mkdir(parents=True, exist_ok=True)

for year in years:
# year gets parsed from this filename and depends on this format
Expand All @@ -58,6 +48,7 @@ def load_dataframe_from_remote(years=range(1999, 2024)):
url = f"https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_{year}.csv.gz"
print(f"Reading from remote: {url}")
df = pd.read_csv(url, low_memory=False)
# df = pd.read_csv(url, low_memory=True)

# Save year on dataframe
df["year"] = year
Expand All @@ -70,8 +61,6 @@ def load_dataframe_from_remote(years=range(1999, 2024)):


def load_dataframe_from_raw():
# data_directory = os.path.join(THIS_DIR, DATA_DIR) # was used in place of DATA_DIR below

if not os.path.exists(DATA_DIR):
raise FileNotFoundError(f"Data directory '{DATA_DIR}' not found.")

Expand Down Expand Up @@ -100,7 +89,8 @@ def load_dataframe_from_raw():
print(f"Reading {filename}")
file_path = os.path.join(DATA_DIR, filename)

df = pd.read_csv(file_path, compression="gzip", low_memory=False)
# df = pd.read_csv(file_path, compression="gzip", low_memory=False)
df = pd.read_csv(file_path, compression="gzip", low_memory=True)

# Save year from filename on dataframe
year = get_year_from_filename(filename)
Expand Down
2 changes: 1 addition & 1 deletion nfl_analytics/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ def main():
else:
download_data(year_set)
else:
print("Downloading all years...")
download_data()

if args.train:
start_time = time.time()
try:
print("Loading dataframe...")
df_raw = load_dataframe_from_raw()
# df_raw = load_dataframe_from_remote()
except FileNotFoundError as e:
Expand Down

0 comments on commit 4d12d43

Please sign in to comment.