DataWaveProject · jatkinson1000 · Mar 18, 2024 · Feb 20, 2024 · Feb 22, 2024 · Feb 22, 2024
diff --git a/README.md b/README.md
@@ -1,3 +1,9 @@
+# Overview
+The repository contains the code to train a neural network to emulate the gravity wave drag (GWD) in the WACCM simulation.
+The code aims trains a pytorch Feed Forward network (FF)
+
+
+
 # newCAM-Emulation
 This is a DNN written with PyTorch to Emulate the gravity wave drag (GWD, both zonal and meridional ) in the WACCM Simulation.
 

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
@@ -1,26 +1,36 @@
+"""Neural Network model for the CAM-EM."""
+
 import netCDF4 as nc
 import numpy as np
 import scipy.stats as st
-import xarray as xr
-
 import torch
+import xarray as xr
 from torch import nn
-import torch.nn.utils.prune as prune
-from torch.utils.data import DataLoader
-from torch.utils.data import Dataset
+from torch.nn.utils import prune
+from torch.utils.data import DataLoader, Dataset
+
 
 # Required for feeding the data iinto NN.
 class myDataset(Dataset):
-    def __init__(self, X, Y):
+    """
+    Dataset class for loading features and labels.
 
+    Args:
+        X (numpy.ndarray): Input features.
+        Y (numpy.ndarray): Corresponding labels.
+    """
+
+    def __init__(self, X, Y):
+        """Create an instance of myDataset class."""
         self.features = torch.tensor(X, dtype=torch.float64)
         self.labels = torch.tensor(Y, dtype=torch.float64)
 
     def __len__(self):
+        """Return the number of samples in the dataset."""
         return len(self.features.T)
 
     def __getitem__(self, idx):
-
+        """Return a sample from the dataset."""
         feature = self.features[:, idx]
         label = self.labels[:, idx]
 
@@ -29,12 +39,23 @@ def __getitem__(self, idx):
 
 # The NN model.
 class FullyConnected(nn.Module):
+    """
+    Fully connected neural network model.
+
+    The model consists of multiple fully connected layers with SiLU activation function.
+
+    Attributes
+    ----------
+        linear_stack (torch.nn.Sequential): Sequential container for layers.
+    """
+
     def __init__(self):
+        """Create an instance of FullyConnected NN model."""
         super(FullyConnected, self).__init__()
-        ilev=93
+        ilev = 93
 
         self.linear_stack = nn.Sequential(
-            nn.Linear(8*ilev+4, 500, dtype=torch.float64),
+            nn.Linear(8 * ilev + 4, 500, dtype=torch.float64),
             nn.SiLU(),
             nn.Linear(500, 500, dtype=torch.float64),
             nn.SiLU(),
@@ -58,16 +79,38 @@ def __init__(self):
             nn.SiLU(),
             nn.Linear(500, 500, dtype=torch.float64),
             nn.SiLU(),
-            nn.Linear(500, 2*ilev, dtype=torch.float64),
+            nn.Linear(500, 2 * ilev, dtype=torch.float64),
         )
 
     def forward(self, X):
+        """
+        Forward pass through the network.
 
+        Args:
+            X (torch.Tensor): Input tensor.
+
+        Returns
+        -------
+            torch.Tensor: Output tensor.
+        """
         return self.linear_stack(X)
 
 
 # training loop
 def train_loop(dataloader, model, loss_fn, optimizer):
+    """
+    Training loop.
+
+    Args:
+        dataloader (DataLoader): DataLoader for training data.
+        model (nn.Module): Neural network model.
+        loss_fn (torch.nn.Module): Loss function.
+        optimizer (torch.optim.Optimizer): Optimizer.
+
+    Returns
+    -------
+        float: Average training loss.
+    """
     size = len(dataloader.dataset)
     avg_loss = 0
     for batch, (X, Y) in enumerate(dataloader):
@@ -90,6 +133,18 @@ def train_loop(dataloader, model, loss_fn, optimizer):
 
 # validating loop
 def val_loop(dataloader, model, loss_fn):
+    """
+    Validation loop.
+
+    Args:
+        dataloader (DataLoader): DataLoader for validation data.
+        model (nn.Module): Neural network model.
+        loss_fn (torch.nn.Module): Loss function.
+
+    Returns
+    -------
+        float: Average validation loss.
+    """
     avg_loss = 0
     with torch.no_grad():
         for batch, (X, Y) in enumerate(dataloader):
@@ -101,6 +156,3 @@ def val_loop(dataloader, model, loss_fn):
     avg_loss /= len(dataloader)
 
     return avg_loss
-
-
-
diff --git a/newCAM_emulation/NN_pred.py b/newCAM_emulation/NN_pred.py
@@ -1,23 +1,17 @@
+"""Prediction module for the neural network."""
 
-"""
-The following is an import of PyTorch libraries.
-"""
+import matplotlib.pyplot as plt
+import Model
+import netCDF4 as nc
+import numpy as np
 import torch
-import torch.nn as nn
 import torch.nn.functional as nnF
-from torch.utils.data import DataLoader
 import torchvision
+from loaddata import data_loader, newnorm
+from torch import nn
+from torch.utils.data import DataLoader
 from torchvision import datasets, transforms
 from torchvision.utils import save_image
-import matplotlib.pyplot as plt
-import numpy as np
-import random
-import netCDF4 as nc
-import Model
-from loaddata import newnorm, data_loader
-
-
-
 
 """
 Determine if any GPUs are available
@@ -132,7 +126,7 @@
  VTGWSPEC = np.asarray(F['BVTGWSPEC'][0,:,:])
  VTGWSPEC = newnorm(VTGWSPEC, VTGWSPECm, VTGWSPECs)
 
- 
+
 
  print('shape of PS',np.shape(PS))
  print('shape of Z3',np.shape(Z3))
@@ -146,8 +140,9 @@
  print('shape of UTGWSPEC',np.shape(UTGWSPEC))
  print('shape of VTGWSPEC',np.shape(VTGWSPEC))
 
- x_test,y_test = data_loader (U,V,T, DSE, NM, NETDT, Z3, RHOI, PS,lat,lon,UTGWSPEC, VTGWSPEC)
-
+ x_test,y_test = data_loader (U,V,T, DSE, NM, NETDT, Z3,
+                              RHOI, PS,lat,lon,UTGWSPEC, VTGWSPEC)
+
  print('shape of x_test', np.shape(x_test))
  print('shape of y_test', np.shape(y_test))
 
@@ -166,10 +161,10 @@
  print(np.corrcoef(truth.flatten(), predict.flatten())[0, 1])
  print('shape of truth ',np.shape(truth))
  print('shape of prediction',np.shape(predict))
- 
+
  np.save('./pred_data_' + str(iter) + '.npy', predict)
 
 
 
-     
-  
+
+
diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
@@ -1,10 +1,23 @@
+"""Implementing data loader for training neural network."""
+
 import numpy as np
 
 ilev = 93
 dim_NN =int(8*ilev+4)
 dim_NNout =int(2*ilev)
 
 def newnorm(var, varm, varstd):
+  """Normalizes the input variable(s) using mean and standard deviation.
+
+  Args:
+      var (numpy.ndarray): Input variable(s) to be normalized.
+      varm (numpy.ndarray): Mean of the variable(s).
+      varstd (numpy.ndarray): Standard deviation of the variable(s).
+
+  Returns
+  -------
+      numpy.ndarray: Normalized variable(s).
+  """
   dim=varm.size
   if dim > 1 :
     vara = var - varm[:, :]
@@ -17,11 +30,32 @@ def newnorm(var, varm, varstd):
 
 
 def data_loader (U,V,T, DSE, NM, NETDT, Z3, RHOI, PS, lat, lon, UTGWSPEC, VTGWSPEC):
+  """
+  Loads and preprocesses input data for neural network training.
 
+  Args:
+      U (numpy.ndarray): Zonal wind component.
+      V (numpy.ndarray): Meridional wind component.
+      T (numpy.ndarray): Temperature.
+      DSE (numpy.ndarray): Dry static energy.
+      NM (numpy.ndarray): Northward mass flux.
+      NETDT (numpy.ndarray): Net downward total radiation flux.
+      Z3 (numpy.ndarray): Geopotential height.
+      RHOI (numpy.ndarray): Air density.
+      PS (numpy.ndarray): Surface pressure.
+      lat (numpy.ndarray): Latitude.
+      lon (numpy.ndarray): Longitude.
+      UTGWSPEC (numpy.ndarray): Target zonal wind spectral component.
+      VTGWSPEC (numpy.ndarray): Target meridional wind spectral component.
+
+  Returns
+  -------
+      tuple: A tuple containing the input data and target data arrays.
+  """
   Ncol = U.shape[1]
   #Nlon = U.shape[2]
   #Ncol = Nlat*Nlon
-   
+
   x_train = np.zeros([dim_NN,Ncol])
   y_train = np.zeros([dim_NNout,Ncol])