bus13_state_reward.py

# Kevin Moy, 8/6/2020
# Methods to read OpenDSS IEEE 13-bus and determine system state and reward from system states

import win32com.client
import numpy as np

# Width of voltage zone:
VOLT_ZONE_WIDTH = 0.05

# Width of acceptable voltage:
VOLT_ACC_WIDTH = 0.1

# Base penalty:
PENALTY = -200

# Upper and lower bounds of voltage zones:
ZONE2_UB = 1.10
ZONE1_UB = 1.05
ZONE1_LB = 0.95
ZONE2_LB = 0.90

# Penalties for each zone:
# TODO: Tune these hyperparameters
ZONE1_PENALTY = PENALTY
ZONE2_PENALTY = 2 * PENALTY


def get_state(DSSCircuit):
    # Currently, only takes in IEEE 13 bus OpenDSS as input
    # DSSCircuit: object of type DSSObj.ActiveCircuit (COM interface for OpenDSS Circuit)
    # Returns: NumPy array of voltages for each bus as the state vector

    vmags = DSSCircuit.AllBusVmagPu
    states = np.array(vmags)

    return states


def step_reward(sts):
    # Calculate step-function reward from states (bus voltages)
    # sts: NumPy array of voltages for each bus as the state vector
    # Returns: single floating-point number as reward

    # Number of buses in voltage zone 1
    num_zone1 = np.size(np.nonzero(np.logical_and(sts >= ZONE1_UB, sts < ZONE2_UB)))\
                + np.size(np.nonzero(np.logical_and(sts <= ZONE1_LB, sts > ZONE2_LB)))

    # Number of buses in voltage zone 2
    num_zone2 = np.size(np.nonzero(sts >= ZONE2_UB)) \
                + np.size(np.nonzero(sts <= ZONE2_LB))

    reward = num_zone1*ZONE1_PENALTY + num_zone2*ZONE2_PENALTY

    return reward


def quad_reward(sts):
    # Calculate quadratic reward from states (bus voltages) with deadband in acceptable voltage range
    # sts: NumPy array of voltages for each bus as the state vector
    # Returns: single floating-point number as reward

    sts_loss = (PENALTY/VOLT_ZONE_WIDTH**2) * (np.maximum(abs(sts-1)-VOLT_ACC_WIDTH/2, 0))**2

    reward = np.sum(sts_loss)

    return reward