Introducing Learning for Storages (#464)

# Pull Request ## Related Issue Closes #457 ## Description This PR introduces a learning strategy for storages implemented in accordance with the published strategy by @nick-harder, where the storage decides on the direction and the price height. The convergence has been tested and looks good. Also the example_02e has the set of finetuned hyperparameter to enable good storage learning performance. ## Changes Proposed - added a new DRL based bidding strategy for the storage unit - prepared example 02e with a learning storage unit - switched from using SOC in the storage unit as relative value to absolute value to avoid confusions - enhanced the docstrings of both learning strategies for the power plant as well as the storage - adjusted tests and bidding strategies to accomodate the above changes - changed pyomo usage for dmas storages to align with usage in other moduls ## Testing General usability as well as learning performance was tested. No multi-agent tests were done yet, this will come in the following PRs. ## Checklist Please check all applicable items: - [x] Code changes are sufficiently documented (docstrings, inline comments, `doc` folder updates) - [x] New unit tests added for new features or bug fixes - [x] Existing tests pass with the changes - [x] Reinforcement learning examples are operational (for DRL-related changes) - [x] Code tested with both local and Docker databases - [x] Code follows project style guidelines and best practices - [x] Changes are backwards compatible, or deprecation notices added - [ ] New dependencies added to `pyproject.toml` - [x] A note for the release notes `doc/release_notes.rst` of the upcoming release is included - [x] Consent to release this PR's code under the GNU Affero General Public License v3.0 ## Additional Notes (if applicable) [Any additional information, concerns, or areas you want reviewers to focus on] ## Screenshots (if applicable) @nick-harder ![image](https://github.com/user-attachments/assets/ed84c728-d7fa-45dc-8548-51375e207fe5) @ModdyLP ![image](https://github.com/user-attachments/assets/aa7c30d3-4ba6-4b2a-9dac-c73f6bc47979) --------- Co-authored-by: Nick Harder <[email protected]> Co-authored-by: Florian Maurer <[email protected]>
assume-framework · Nov 5, 2024 · 80b35b3 · 80b35b3
1 parent d853742
commit 80b35b3
Show file tree

Hide file tree

Showing 30 changed files with 36,421 additions and 283 deletions.
diff --git a/assume/common/base.py b/assume/common/base.py
@@ -12,6 +12,11 @@
 from assume.common.forecasts import Forecaster
 from assume.common.market_objects import MarketConfig, Orderbook, Product
 
+try:
+    import torch as th
+except ImportError:
+    th = None
+
 
 class BaseStrategy:
     pass
@@ -527,7 +532,7 @@ class SupportsMinMaxCharge(BaseUnit):
     # negative
     ramp_down_charge: float
     # ramp_down_charge is negative
-    max_volume: float
+    max_soc: float
     efficiency_charge: float
     efficiency_discharge: float
 

diff --git a/assume/scenario/loader_amiris.py b/assume/scenario/loader_amiris.py
@@ -277,8 +277,8 @@ def add_agent_to_world(
                 price_forecast=forecast_price,
             )
 
-            max_volume = device["EnergyToPowerRatio"] * device["InstalledPowerInMW"]
-            initial_soc = 100 * device["InitialEnergyLevelInMWH"] / max_volume
+            max_soc = device["EnergyToPowerRatio"] * device["InstalledPowerInMW"]
+            initial_soc = device["InitialEnergyLevelInMWH"]
             # TODO device["SelfDischargeRatePerHour"]
             world.add_unit(
                 f"StorageTrader_{agent['Id']}",
@@ -290,7 +290,7 @@ def add_agent_to_world(
                     "efficiency_charge": device["ChargingEfficiency"],
                     "efficiency_discharge": device["DischargingEfficiency"],
                     "initial_soc": initial_soc,
-                    "max_volume": max_volume,
+                    "max_soc": max_soc,
                     "bidding_strategies": storage_strategies,
                     "technology": "hydro",  # PSPP? Pump-Storage Power Plant
                     "emission_factor": 0,

diff --git a/assume/scenario/loader_csv.py b/assume/scenario/loader_csv.py
@@ -972,7 +972,12 @@ def run_learning(
             world.run()
 
             total_rewards = world.output_role.get_sum_reward()
+
+            if len(total_rewards) == 0:
+                raise AssumeException("No rewards were collected during evaluation run")
+
             avg_reward = np.mean(total_rewards)
+
             # check reward improvement in evaluation run
             # and store best run in eval folder
             terminate = world.learning_role.compare_and_save_policies(

diff --git a/assume/scenario/loader_pypsa.py b/assume/scenario/loader_pypsa.py
@@ -156,7 +156,7 @@ def load_pypsa(
                 "efficiency_charge": storage.efficiency_store,
                 "efficiency_discharge": storage.efficiency_dispatch,
                 "initial_soc": storage.state_of_charge_initial,
-                "max_volume": storage.p_nom,
+                "max_soc": storage.p_nom,
                 "bidding_strategies": bidding_strategies[storage.name],
                 "technology": "hydro",
                 "emission_factor": 0,

diff --git a/assume/strategies/__init__.py b/assume/strategies/__init__.py
@@ -51,9 +51,13 @@
     from assume.strategies.learning_advanced_orders import (
         RLAdvancedOrderStrategy,
     )
-    from assume.strategies.learning_strategies import RLStrategy
+    from assume.strategies.learning_strategies import (
+        RLStrategy,
+        StorageRLStrategy,
+    )
 
     bidding_strategies["pp_learning"] = RLStrategy
+    bidding_strategies["storage_learning"] = StorageRLStrategy
     bidding_strategies["learning_advanced_orders"] = RLAdvancedOrderStrategy
 
 except ImportError:

diff --git a/assume/strategies/dmas_storage.py b/assume/strategies/dmas_storage.py
@@ -6,15 +6,7 @@
 
 import numpy as np
 import pandas as pd
-from pyomo.environ import (
-    ConcreteModel,
-    ConstraintList,
-    Objective,
-    Reals,
-    Var,
-    maximize,
-    quicksum,
-)
+import pyomo.environ as pyo
 from pyomo.opt import SolverFactory, check_available_solvers
 
 from assume.common.base import BaseStrategy, SupportsMinMaxCharge
@@ -116,7 +108,7 @@ class DmasStorageStrategy(BaseStrategy):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        self.model = ConcreteModel("storage")
+        self.model = pyo.ConcreteModel("storage")
         self.opt = get_solver_factory()
 
     def build_model(self, unit: SupportsMinMaxCharge, start: datetime, hour_count: int):
@@ -135,23 +127,24 @@ def build_model(self, unit: SupportsMinMaxCharge, start: datetime, hour_count: i
         self.model.clear()
         time_range = range(hour_count)
 
-        self.model.p_plus = Var(
-            time_range, within=Reals, bounds=(0, -unit.max_power_charge)
+        self.model.p_plus = pyo.Var(
+            time_range, within=pyo.Reals, bounds=(0, -unit.max_power_charge)
         )
-        self.model.p_minus = Var(
-            time_range, within=Reals, bounds=(0, unit.max_power_discharge)
+        self.model.p_minus = pyo.Var(
+            time_range, within=pyo.Reals, bounds=(0, unit.max_power_discharge)
+        )
+        self.model.volume = pyo.Var(
+            time_range, within=pyo.NonNegativeReals, bounds=(0, unit.max_soc)
         )
-        self.model.volume = Var(time_range, within=Reals, bounds=(0, unit.max_volume))
 
         self.power = [
             -self.model.p_minus[t] / unit.efficiency_discharge
             + self.model.p_plus[t] * unit.efficiency_charge
             for t in time_range
         ]
 
-        self.model.vol_con = ConstraintList()
-        soc0 = unit.get_soc_before(start)
-        v0 = unit.max_volume * soc0
+        self.model.vol_con = pyo.ConstraintList()
+        v0 = unit.get_soc_before(start)
 
         for t in time_range:
             if t == 0:
@@ -162,7 +155,7 @@ def build_model(self, unit: SupportsMinMaxCharge, start: datetime, hour_count: i
                 )
 
         # always end with half full SoC
-        self.model.vol_con.add(self.model.volume[hour_count - 1] == unit.max_volume / 2)
+        self.model.vol_con.add(self.model.volume[hour_count - 1] == unit.max_soc / 2)
         return self.power
 
     def optimize(
@@ -194,8 +187,8 @@ def optimize(
             prices = func(base_price.values)
             self.power = self.build_model(unit, start, hour_count)
             profit = [-self.power[t] * prices[t] for t in time_range]
-            self.model.obj = Objective(
-                expr=quicksum(profit[t] for t in time_range), sense=maximize
+            self.model.obj = pyo.Objective(
+                expr=pyo.quicksum(profit[t] for t in time_range), sense=pyo.maximize
             )
             self.opt.solve(self.model)
 

diff --git a/assume/strategies/flexable_storage.py b/assume/strategies/flexable_storage.py
@@ -156,14 +156,10 @@ def calculate_bids(
                     (bid_quantity + current_power)
                     * time_delta
                     / unit.efficiency_discharge
-                    / unit.max_volume
                 )
             elif bid_quantity + current_power < 0:
                 delta_soc = -(
-                    (bid_quantity + current_power)
-                    * time_delta
-                    * unit.efficiency_charge
-                    / unit.max_volume
+                    (bid_quantity + current_power) * time_delta * unit.efficiency_charge
                 )
             else:
                 delta_soc = 0
@@ -263,9 +259,7 @@ def calculate_bids(
 
         previous_power = unit.get_output_before(start)
 
-        min_power_discharge, max_power_discharge = unit.calculate_min_max_discharge(
-            start, end
-        )
+        _, max_power_discharge = unit.calculate_min_max_discharge(start, end)
         bids = []
         theoretic_SOC = unit.outputs["soc"][start]
         for product in product_tuples:
@@ -303,7 +297,7 @@ def calculate_bids(
                     abs(specific_revenue) * unit.min_power_discharge / bid_quantity
                 )
 
-            energy_price = capacity_price / (theoretic_SOC * unit.max_volume)
+            energy_price = capacity_price
 
             if market_config.product_type == "capacity_pos":
                 bids.append(
@@ -335,7 +329,6 @@ def calculate_bids(
                     (bid_quantity + current_power)
                     * time_delta
                     / unit.efficiency_discharge
-                    / unit.max_volume
                 )
                 theoretic_SOC += delta_soc
                 previous_power = bid_quantity + current_power
@@ -395,7 +388,7 @@ def calculate_bids(
 
         theoretic_SOC = unit.outputs["soc"][start]
 
-        min_power_charge, max_power_charge = unit.calculate_min_max_charge(start, end)
+        _, max_power_charge = unit.calculate_min_max_charge(start, end)
 
         bids = []
         for product in product_tuples:
@@ -442,10 +435,7 @@ def calculate_bids(
                 # calculate theoretic SOC
                 time_delta = (end - start) / timedelta(hours=1)
                 delta_soc = (
-                    (bid_quantity + current_power)
-                    * time_delta
-                    * unit.efficiency_charge
-                    / unit.max_volume
+                    (bid_quantity + current_power) * time_delta * unit.efficiency_charge
                 )
                 theoretic_SOC += delta_soc
                 previous_power = bid_quantity + current_power
@@ -519,12 +509,12 @@ def get_specific_revenue(unit, marginal_cost, t, foresight, price_forecast):
             power_discharge=max_power_discharge.iloc[i],
         )
         possible_revenue += (market_price - marginal_cost) * theoretic_power_discharge
-        theoretic_SOC -= theoretic_power_discharge / unit.max_volume
+        theoretic_SOC -= theoretic_power_discharge
         previous_power = theoretic_power_discharge
 
     if soc != theoretic_SOC:
-        possible_revenue = possible_revenue / (soc - theoretic_SOC) / unit.max_volume
+        possible_revenue = possible_revenue / (soc - theoretic_SOC)
     else:
-        possible_revenue = possible_revenue / unit.max_volume
+        possible_revenue = possible_revenue / unit.max_soc
 
     return possible_revenue