forked from christiantae/rl-microgrid
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bus13_state_reward.py
66 lines (45 loc) · 1.86 KB
/
bus13_state_reward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Kevin Moy, 8/6/2020
# Methods to read OpenDSS IEEE 13-bus and determine system state and reward from system states
import win32com.client
import numpy as np
# Width of voltage zone:
VOLT_ZONE_WIDTH = 0.05
# Width of acceptable voltage:
VOLT_ACC_WIDTH = 0.1
# Base penalty:
PENALTY = -200
# Upper and lower bounds of voltage zones:
ZONE2_UB = 1.10
ZONE1_UB = 1.05
ZONE1_LB = 0.95
ZONE2_LB = 0.90
# Penalties for each zone:
# TODO: Tune these hyperparameters
ZONE1_PENALTY = PENALTY
ZONE2_PENALTY = 2 * PENALTY
def get_state(DSSCircuit):
# Currently, only takes in IEEE 13 bus OpenDSS as input
# DSSCircuit: object of type DSSObj.ActiveCircuit (COM interface for OpenDSS Circuit)
# Returns: NumPy array of voltages for each bus as the state vector
vmags = DSSCircuit.AllBusVmagPu
states = np.array(vmags)
return states
def step_reward(sts):
# Calculate step-function reward from states (bus voltages)
# sts: NumPy array of voltages for each bus as the state vector
# Returns: single floating-point number as reward
# Number of buses in voltage zone 1
num_zone1 = np.size(np.nonzero(np.logical_and(sts >= ZONE1_UB, sts < ZONE2_UB)))\
+ np.size(np.nonzero(np.logical_and(sts <= ZONE1_LB, sts > ZONE2_LB)))
# Number of buses in voltage zone 2
num_zone2 = np.size(np.nonzero(sts >= ZONE2_UB)) \
+ np.size(np.nonzero(sts <= ZONE2_LB))
reward = num_zone1*ZONE1_PENALTY + num_zone2*ZONE2_PENALTY
return reward
def quad_reward(sts):
# Calculate quadratic reward from states (bus voltages) with deadband in acceptable voltage range
# sts: NumPy array of voltages for each bus as the state vector
# Returns: single floating-point number as reward
sts_loss = (PENALTY/VOLT_ZONE_WIDTH**2) * (np.maximum(abs(sts-1)-VOLT_ACC_WIDTH/2, 0))**2
reward = np.sum(sts_loss)
return reward