Add runnable docker example and fix some bugs

SETO2243 · Aug 30, 2024 · 8af7f51 · 8af7f51
1 parent e84ae80
commit 8af7f51
Show file tree

Hide file tree

Showing 12 changed files with 10,066 additions and 76 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,34 @@
+# Include any files or directories that you don't want to be copied to your
+# container here (e.g., local build artifacts, temporary files, etc.).
+#
+# For more help, visit the .dockerignore file reference guide at
+# https://docs.docker.com/engine/reference/builder/#dockerignore-file
+
+**/.DS_Store
+**/__pycache__
+**/.venv
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+LICENSE
+README.md
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,56 @@
+# syntax=docker/dockerfile:1
+
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Dockerfile reference guide at
+# https://docs.docker.com/engine/reference/builder/
+
+ARG PYTHON_VERSION=3.10.14
+FROM python:${PYTHON_VERSION}-bullseye as base
+
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
+ARG UID=10001
+RUN adduser \
+    --disabled-password \
+    --gecos "" \
+    --home "/nonexistent" \
+    --shell "/sbin/nologin" \
+    --no-create-home \
+    --uid "${UID}" \
+    appuser
+
+RUN apt-get -y update
+RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
+    apt-get -y dist-upgrade
+
+RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \
+    apt-get -y install libeccodes-dev libhdf5-serial-dev pkg-config cmake g++-10 gcc-10
+
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=requirements.txt \
+    python -m pip install -r requirements.txt
+
+# Switch to the non-privileged user to run the application.
+USER appuser
+
+# Copy the source code into the container.
+COPY . .
+
+# Expose the port that the application listens on.
+EXPOSE 8890
+
+# Run the application.
+CMD python -m example_forecast
diff --git a/README.md b/README.md
@@ -19,6 +19,17 @@ the composable model framework. Example PV model construction is shown below.
 The API for fit, predict, and metrics is reduced to specifying a start and end times for a given location.
 The model must construct feature data using column transforms. Having done so, forecasting as a service become trivial.
 
+## Getting started
+
+Users can verify the code works with the example_forecast.py script by running `docker compose up --build`
+The log output from the container will include feature and weather data as well as predicted values. The script
+takes several minutes to run because the weather data is large.
+
+The dockerfile included in the project will run the example_forecast.py which demonstrates both the machine
+learning model for AMI meter forecasting and the physics based PV model using PySam. Users can then choose between
+their local working environment and containerized environment to extend and experiment with the time series models
+library.
+
 ## Installation
 
 This library is designed for use by technical engineers and data scientists. It takes advantage of the Python
@@ -70,8 +81,6 @@ jupyter notebook --NotebookApp.ip=0.0.0.0
 
 This will print a URL, which you can open in your browser. Then open the example notebook and execute the cells in the demonstration to get acquainted with the functionality.
 
-<!-- TODO: add docker executable image in July 2024 -->
-
 ## Usage
 Models can be composed of mixins for various estimators and forecast processes. These composable
 pieces can be put together in different ways to solve many problems. The RegularTimeSeriesModel is the
@@ -159,11 +168,18 @@ using machine learning models like xgboost too.
 
 ```python
 pv_config = dict(
-    site_config_mapping="RESOURCE_SELF",
-    site_meter_mapping=None,
-    site_latlong_mapping="RESOURCE_SELF",
-    source_mode="12_hour_horizon",
-    lags=None,
+  lags=None,
+  site_config_mapping={
+    "capybara": ["/app/pv_site.json"],
+  },
+  site_latlong_mapping={
+    "capybara": dict(
+      latitude=40.0,
+      longitude=-100.0,
+    ),
+  },
+  site_meter_mapping=None,
+  source_mode="12_hour_horizon",
 )
 
 class PVForecastModel(
@@ -186,7 +202,6 @@ Engineers and data scientists commonly use an interactive web-based development
 An [example notebook](https://github.com/SETO2243/forecasting/blob/main/example.ipynb) is provided in this GitHub
 repository which demonstrates the core capabilities of the time series models library developed for the SETO project. 
 
-<!-- TODO: Add screen shot of dockerized output, July 2024-->
 
 ## Input Data
 

diff --git a/compose.yaml b/compose.yaml
@@ -0,0 +1,16 @@
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Docker compose reference guide at
+# https://docs.docker.com/compose/compose-file/
+
+# Here the instructions define your application as a service called "server".
+# This service is built from the Dockerfile in the current directory.
+# You can add other services your application may depend on here, such as a
+# database or a cache. For examples, see the Awesome Compose repository:
+# https://github.com/docker/awesome-compose
+services:
+  server:
+    build:
+      context: .
+    ports:
+      - 8890:8890
+
diff --git a/example_forecast.py b/example_forecast.py
@@ -0,0 +1,90 @@
+import logging
+import numpy as np
+
+from time_series_models.time_series_models import RegularTimeSeriesModel
+from time_series_models.processes import AmiHourlyForecast, PVForecast
+from time_series_models.estimators import (
+    XgbRegressor,
+    IdentityRegressor,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def run_forecast_example():
+
+    logger.info(
+        "Starting forecast example for AMI meter forecast with XgBoost estimator!",
+    )
+
+    class XgbModel(AmiHourlyForecast, XgbRegressor, RegularTimeSeriesModel):
+        pass
+
+    config = dict(
+        lags=np.array([24, 48, 168], dtype="timedelta64[h]"),
+        day_of_week=True,
+        harmonics=np.array([24, 168, 365 * 24], dtype="timedelta64[h]"),
+        met_vars=["t", "r2"],
+        met_horizon=12,
+        mapping=dict(p2ulv18716=dict(latitude=35.0, longitude=-75.0)),
+    )
+    instance = XgbModel(**config)
+
+    instance.fit("2021-01-15", "2021-01-31", "p2ulv18716")
+
+    logger.info("Trained instance: %s", instance.model)
+
+    features_df = instance.features_dataframe("2021-02-01", "2021-02-05", "p2ulv18716")
+    logger.info("Features data: %s", features_df)
+
+    predicted_df = instance.predict_dataframe(
+        "2021-01-01", "2021-02-05", "p2ulv18716", range=True
+    )
+
+    logger.info("Predicted: %s", predicted_df)
+
+    logger.info(
+        "Starting forecast example for PV physical forecast!",
+    )
+
+    pv_config = dict(
+        lags=None,
+        site_config_mapping={
+            "capybara": ["/app/pv_site.json"],
+        },
+        site_latlong_mapping={
+            "capybara": dict(
+                latitude=40.0,
+                longitude=-100.0,
+            ),
+        },
+        site_meter_mapping=None,
+        source_mode="12_hour_horizon",
+    )
+
+    class PVForecastModel(
+        PVForecast,
+        IdentityRegressor,
+        RegularTimeSeriesModel,
+    ):
+        pass
+
+    pv_instance = PVForecastModel(**pv_config)
+    pv_instance.model
+
+    pv_instance.fit("2021-01-15", "2021-01-16", "capybara")
+
+    pv_hrrr_df = pv_instance.hrrr_fetcher.source_loader(
+        np.datetime64("2021-02-01"), np.datetime64("2021-02-05"), "capybara"
+    )
+    logger.info("PV HRRR Data: %s", pv_hrrr_df)
+
+    pv_df = pv_instance.predict_dataframe("2021-02-01", "2021-02-05", "capybara")
+    logger.info("pv predictions: %s", pv_df)
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    run_forecast_example()
+    logging.info("All done!")
+    exit(0)