ShervanGharari · chrimerss · Oct 19, 2021 · Oct 19, 2021
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+examples/*
+data/*
diff --git a/easymore/__pycache__/__init__.cpython-38.pyc b/easymore/__pycache__/__init__.cpython-38.pyc
diff --git a/easymore/__pycache__/easymore.cpython-38.pyc b/easymore/__pycache__/easymore.cpython-38.pyc
diff --git a/easymore/easymore.py b/easymore/easymore.py
@@ -45,6 +45,7 @@ def __init__(self):
         self.license                   =  '' # data license
         self.tolerance                 =  10**-5 # tolerance
         self.save_csv                  =  False # save csv
+        self.save_temp                 =  False # option to save temporary shape file
         self.sort_ID                   =  False # to sort the remapped based on the target shapfile ID; self.target_shp_ID should be given
         self.complevel                 =  4 # netcdf compression level from 1 to 9. Any other value or object will mean no compression.
         self.version                   =  '0.0.2' # version of the easymore
@@ -83,48 +84,54 @@ def nc_remapper(self):
             if (self.case == 1 or self.case == 2)  and (self.source_shp == ''):
                 if self.case == 1:
                     if hasattr(self, 'lat_expanded') and hasattr(self, 'lon_expanded'):
-                        self.lat_lon_SHP(self.lat_expanded, self.lon_expanded,\
+                        source_shp_gpd= self.lat_lon_SHP(self.lat_expanded, self.lon_expanded,\
                             self.temp_dir+self.case_name+'_source_shapefile.shp')
                     else:
-                        self.lat_lon_SHP(self.lat, self.lon,\
+                        source_shp_gpd= self.lat_lon_SHP(self.lat, self.lon,\
                             self.temp_dir+self.case_name+'_source_shapefile.shp')
                 else:
-                    self.lat_lon_SHP(self.lat, self.lon,\
+                    source_shp_gpd= self.lat_lon_SHP(self.lat, self.lon,\
                         self.temp_dir+self.case_name+'_source_shapefile.shp')
-                print('EASYMORE is creating the shapefile from the netCDF file and saving it here:')
-                print(self.temp_dir+self.case_name+'_source_shapefile.shp')
+                if self.save_temp:
+                    print('EASYMORE is creating the shapefile from the netCDF file and saving it here:')
+                    print(self.temp_dir+self.case_name+'_source_shapefile.shp')
             if (self.case == 1 or self.case == 2)  and (self.source_shp != ''):
                 source_shp_gpd = gpd.read_file(self.source_shp)
                 source_shp_gpd = self.add_lat_lon_source_SHP(source_shp_gpd, self.source_shp_lat,\
                     self.source_shp_lon, self.source_shp_ID)
-                source_shp_gpd.to_file(self.temp_dir+self.case_name+'_source_shapefile.shp')
-                print('EASYMORE detect the shapefile is provided and will resave it here:')
-                print(self.temp_dir+self.case_name+'_source_shapefile.shp')
+                if self.save_temp:
+                    source_shp_gpd.to_file(self.temp_dir+self.case_name+'_source_shapefile.shp')
+                    print('EASYMORE detect the shapefile is provided and will resave it here:')
+                    print(self.temp_dir+self.case_name+'_source_shapefile.shp')
             # if case 3 or source shapefile is provided
             if (self.case == 3) and (self.source_shp != ''):
                 self.check_source_nc_shp() # check the lat lon in soure shapefile and nc file
                 source_shp_gpd = gpd.read_file(self.source_shp)
                 source_shp_gpd = self.add_lat_lon_source_SHP(source_shp_gpd, self.source_shp_lat,\
                     self.source_shp_lon, self.source_shp_ID)
-                source_shp_gpd.to_file(self.temp_dir+self.case_name+'_source_shapefile.shp')
-                print('EASYMORE is creating the shapefile from the netCDF file and saving it here:')
-                print(self.temp_dir+self.case_name+'_source_shapefile.shp')
+                if self.save_temp:
+                    source_shp_gpd.to_file(self.temp_dir+self.case_name+'_source_shapefile.shp')
+                    print('EASYMORE is creating the shapefile from the netCDF file and saving it here:')
+                    print(self.temp_dir+self.case_name+'_source_shapefile.shp')
             # intersection of the source and sink/target shapefile
-            shp_1 = gpd.read_file(self.temp_dir+self.case_name+'_target_shapefile.shp')
-            shp_2 = gpd.read_file(self.temp_dir+self.case_name+'_source_shapefile.shp')
+            shp_1 = target_shp_gpd
+            shp_2 = source_shp_gpd
             # correction of the source and target shapefile to frame of -180 to 180
             shp_1 = self.shp_lon_correction(shp_1)
             shp_2 = self.shp_lon_correction(shp_2)
-            shp_1.to_file(self.temp_dir+self.case_name+'_target_shapefile_corrected_frame.shp')
-            shp_2.to_file(self.temp_dir+self.case_name+'_source_shapefile_corrected_frame.shp')
+            if self.save_temp:
+                shp_1.to_file(self.temp_dir+self.case_name+'_target_shapefile_corrected_frame.shp')
+                shp_2.to_file(self.temp_dir+self.case_name+'_source_shapefile_corrected_frame.shp')
             # reprojections to equal area
             if (str(shp_1.crs).lower() == str(shp_2.crs).lower()) and ('epsg:4326' in str(shp_1.crs).lower()):
                 shp_1 = shp_1.to_crs ("EPSG:6933") # project to equal area
-                shp_1.to_file(self.temp_dir+self.case_name+'test.shp')
-                shp_1 = gpd.read_file(self.temp_dir+self.case_name+'test.shp')
+                if self.save_temp:
+                    shp_1.to_file(self.temp_dir+self.case_name+'test.shp')
+                    shp_1 = gpd.read_file(self.temp_dir+self.case_name+'test.shp')
                 shp_2 = shp_2.to_crs ("EPSG:6933") # project to equal area
-                shp_2.to_file(self.temp_dir+self.case_name+'test.shp')
-                shp_2 = gpd.read_file(self.temp_dir+self.case_name+'test.shp')
+                if self.save_temp:
+                    shp_2.to_file(self.temp_dir+self.case_name+'test.shp')
+                    shp_2 = gpd.read_file(self.temp_dir+self.case_name+'test.shp')
                 # remove test files
                 removeThese = glob.glob(self.temp_dir+self.case_name+'test.*')
                 for file in removeThese:
@@ -134,7 +141,8 @@ def nc_remapper(self):
             shp_int = self.intersection_shp(shp_1, shp_2)
             shp_int = shp_int.sort_values(by=['S_1_ID_t']) # sort based on ID_t
             shp_int = shp_int.to_crs ("EPSG:4326") # project back to WGS84
-            shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files
+            if self.save_temp:
+                shp_int.to_file(self.temp_dir+self.case_name+'_intersected_shapefile.shp') # save the intersected files
             shp_int = shp_int.drop(columns=['geometry']) # remove the geometry
             # rename dictionary
             dict_rename = {'S_1_ID_t' : 'ID_t',
@@ -630,24 +638,25 @@ def lat_lon_SHP(self,
         # check if lat/lon that are taken in has the same dimension
         import geopandas as gpd
         from   shapely.geometry import Polygon
-        import shapefile # pyshed library
-        import shapely
+        from numba import jit
+        # import shapefile # pyshed library
+        # import shapely
         #
         lat_lon_shape = lat.shape
         # write the shapefile
-        with shapefile.Writer(file_name) as w:
-            w.autoBalance = 1 # turn on function that keeps file stable if number of shapes and records don't line up
-            w.field("ID_s",'N') # create (N)umerical attribute fields, integer
-            w.field("lat_s",'F',decimal=4) # float with 4 decimals
-            w.field("lon_s",'F',decimal=4) # float with 4 decimals
-            # preparing the m whcih is a couter for the shapefile arbitrary ID
-            m = 0.00
-            # itterating to create the shapes of the result shapefile ignoring the first and last rows and columns
+        @jit(nopython=True)
+        def bottleneck(lat: np.ndarray, lon: np.ndarray) -> tuple:
+            '''
+            replace two for-loop with C++
+            '''
+            lat_lon_shape= lon.shape
+            geometry = []
+            records= []
+            m=0
             for i in range(1, lat_lon_shape[0] - 1):
                 for j in range(1, lat_lon_shape[1] - 1):
                     # checking is lat and lon is located inside the provided bo
                     # empty the polygon variable
-                    parts = []
                     # update records
                     m += 1 # ID
                     center_lat = lat[i,j] # lat value of data point in source .nc file
@@ -670,8 +679,7 @@ def lat_lon_SHP(self,
                     Lon_LowLeft  = (lon[i, j - 1] + lon[i + 1, j - 1] + lon[i + 1, j] + lon[i, j]) / 4
                     Lon_Left     = (lon[i, j - 1] + lon[i, j]) / 2
                     Lon_UpLeft   = (lon[i - 1, j - 1] + lon[i - 1, j] + lon[i, j - 1] + lon[i, j]) / 4
-                    # creating the polygon given the lat and lon
-                    parts.append([ (Lon_Up,        Lat_Up),\
+                    geometry.append([ (Lon_Up,        Lat_Up),\
                                    (Lon_UpRright,  Lat_UpRright), \
                                    (Lon_Right,     Lat_Right), \
                                    (Lon_LowRight,  Lat_LowRight), \
@@ -680,10 +688,20 @@ def lat_lon_SHP(self,
                                    (Lon_Left,      Lat_Left), \
                                    (Lon_UpLeft,    Lat_UpLeft), \
                                    (Lon_Up,        Lat_Up)])
-                    # store polygon
-                    w.poly(parts)
-                    # update records/fields for the polygon
-                    w.record(m, center_lat, center_lon)
+                    records.append([m, center_lat, center_lon])
+
+            return (geometry, records)
+
+        geometry, records= bottleneck(lat, lon)
+        records= np.stack(records)
+        gdf= gpd.GeoDataFrame(geometry=[Polygon(_geom) for _geom in geometry])
+        gdf['ID_s']= records[:,0].astype(np.int32)
+        gdf['lat_s']= records[:,1].astype(np.float32)
+        gdf['lon_s']= records[:,2].astype(np.float32)
+        if self.save_temp:
+            gdf.to_file(file_name)
+
+        return gdf
 
     def add_lat_lon_source_SHP( self,
                                 shp,
@@ -1365,8 +1383,9 @@ def intersection_shp(   self,
                             shp_2):
         import geopandas as gpd
         from   shapely.geometry import Polygon
-        import shapefile # pyshed library
-        import shapely
+        from numba import jit
+        # import shapefile # pyshed library
+        # import shapely
         """
         @ author:                  Shervan Gharari
         @ Github:                  https://github.com/ShervanGharari/EASYMORE
@@ -1389,6 +1408,15 @@ def intersection_shp(   self,
         result: a geo data frame that includes the intersected shapefile and area, percent and normalized percent of each shape
         elements in another one
         """
+        @jit(nopython=True)
+        def normalize(ids: np.ndarray, unique_ids: np.ndarray,
+                         ap: np.ndarray) -> np.ndarray:
+            ap_new= ap.copy()
+            for i in unique_ids:
+                idx= np.where(ids==i)
+                ap_new[idx]= ap[idx]/ap[idx].sum()
+            return ap_new
+
         # get the column name of shp_1
         column_names = shp_1.columns
         column_names = list(column_names)
@@ -1425,18 +1453,22 @@ def intersection_shp(   self,
         AP1 = np.array(result['AP1'])
         AP1N = AP1 # creating the nnormalized percent area
         ID_S1_unique = np.unique(ID_S1) #unique idea
-        for i in ID_S1_unique:
-            INDX = np.where(ID_S1==i) # getting the indeces
-            AP1N[INDX] = AP1[INDX] / AP1[INDX].sum() # normalizing for that sum
+        AP1N=normalize(ID_S1, ID_S1_unique, AP1)
+        #replace computationally expensive part with C++
+        # for i in ID_S1_unique:
+        #     INDX = np.where(ID_S1==i) # getting the indeces
+        #     AP1N[INDX] = AP1[INDX] / AP1[INDX].sum() # normalizing for that sum
         # taking the part of data frame as the numpy to incread the spead
         # finding the IDs from shapefile one
         ID_S2 = np.array (result['IDS2'])
         AP2 = np.array(result['AP2'])
         AP2N = AP2 # creating the nnormalized percent area
         ID_S2_unique = np.unique(ID_S2) #unique idea
-        for i in ID_S2_unique:
-            INDX = np.where(ID_S2==i) # getting the indeces
-            AP2N[INDX] = AP2[INDX] / AP2[INDX].sum() # normalizing for that sum
+        AP2N= normalize(ID_S2, ID_S2_unique, AP2)
+        # replace computationally expensive part with C++
+        # for i in ID_S2_unique:
+        #     INDX = np.where(ID_S2==i) # getting the indeces
+        #     AP2N[INDX] = AP2[INDX] / AP2[INDX].sum() # normalizing for that sum
         result ['AP1N'] = AP1N
         result ['AP2N'] = AP2N
         return result

diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,46 @@
-.
+affine==2.3.0
+attrs==21.2.0
+certifi==2021.10.8
+cftime==1.5.1
+click==8.0.3
+click-plugins==1.1.1
+cligj==0.7.2
+cycler==0.10.0
+DateTime==4.3
+Fiona==1.8.20
+GDAL==3.3.2
+geojson==2.5.0
+geopandas==0.10.2
+geovoronoi==0.3.0
+imageio==2.9.0
+json5==0.9.6
+kiwisolver==1.3.2
+llvmlite==0.37.0
+matplotlib==3.4.3
+munch==2.5.0
+netCDF4==1.5.7
+networkx==2.6.3
+numba==0.54.1
+numpy==1.20.3
+pandas==1.3.4
+Pillow==8.4.0
+pip==21.2.4
+pyparsing==2.4.7
+pyproj==3.2.1
+pysheds==0.2.7
+pyshp==2.1.3
+python-dateutil==2.8.2
+pytz==2021.3
+PyWavelets==1.1.1
+rasterio==1.2.10
+Rtree==0.9.7
+scikit-image==0.18.3
+scipy==1.6.3
+setuptools==58.0.4
+Shapely==1.7.1
+six==1.16.0
+snuggs==1.4.7
+tifffile==2021.10.12
+wheel==0.37.0
+xarray==0.19.0
+zope.interface==5.4.0