From 96cc2e7d316f78b4edb1fae4fa503673a0bae614 Mon Sep 17 00:00:00 2001
From: Blair Currey <12960453+BlairCurrey@users.noreply.github.com>
Date: Tue, 6 Feb 2024 22:33:06 -0500
Subject: [PATCH] chore: update comment

---
 README.md             | 2 ++
 nfl_analytics/data.py | 7 +------
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index d416906..a47f154 100644
--- a/README.md
+++ b/README.md
@@ -114,6 +114,8 @@ score differential is wrong? look at first game. the number for the 2 teams dont
   - [0] (maybe) if there are any hardcoded paths (like asset dir?), think about how to not hardcode them.
     - punting on this one. not really important to make this configurable.
 - Quality of Life Improvements
+  - [ ] rename model? LinRegSpreadPredictor? in the release at least, not sure if anywhere else
+    - LinReg is descriptive but is it an implementation detail. Do I want to have an DecisionTreeSpreadPredictor in the future? Or would I only have a decision tree based model if it replaced the lin reg one? Maybe thats a "wait until (if) you actually have another model" problem.
   - [ ] suppress pandas warnings?? "import pandas as pd"
   - [ ] add cli doc generator. look into `argparse.HelpFormatter` to generate a markdown file.
   - [ ] add types
diff --git a/nfl_analytics/data.py b/nfl_analytics/data.py
index 2de0295..3650c03 100644
--- a/nfl_analytics/data.py
+++ b/nfl_analytics/data.py
@@ -88,13 +88,8 @@ def load_dataframe_from_raw():
             print(f"Reading {filename}")
             file_path = os.path.join(DATA_DIR, filename)
 
-            # TODO: Throws DtypeWarning about mixed types and says "Specify dtype option on import or set low_memory=False.""
-            # However, model training results are unchanged and this is required to run
-            # in gh actions without timing out. Perhaps an alternative solution to gh actions
-            # timeing out would enable using low_memory=False. Like: https://github.com/actions/runner-images/discussions/7188#discussioncomment-6750749
-            # Or maybe using chunksize and iterator? https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
+            # FWIW, low_memory seems to work fine (no model performance change) but it does warn of differing column types
             df = pd.read_csv(file_path, compression="gzip", low_memory=False)
-            # df = pd.read_csv(file_path, compression="gzip", low_memory=True)
 
             # Save year from filename on dataframe
             year = get_year_from_filename(filename)